********************************************************************************

* Filename: Table_7_heterogeneity.do

********************************************************************************
*  This do-file is part of the collection of replication files for Schaffner, Glewwe, and Sharma, 
*  "Why Programs Fail:  Lessons for Improving Public Service Quality from a Mixed Methods Evaluation
*   of an Unsuccessful Teacher Training Program in Nepal"
*
*This do-file calculates:
*      - all results in Table 7 of the WBER journal article
*	   - all results in Table S6 of the Supplementary Online Appendix
*	   - multiple hypothesis testing correction results related to those tables
*     
* This involves two types of estimation.  The first is IV/LATE estimation of the impact of
* having a trained teacher on student test scores, allowing for that impact to differ by
* the student's tercile of the baseline test score distribution (using weights to reflect the
* population distribution). 
*		The do-file implements adjustments to standard errors and confidence intervals, 
*		and assesses statistical significance, for the LATE estimation following:

*		Lee, David S., Justin McCrary, Marcelo J. Moreira, and Jack Porter, 2022,
*		"Valid t-Ratio Inference for IV" American Econoimc Review 112(10):3260-3290.
*
* 		Making use of adjustments in the Lee et al. paper, we also bound the p-values for the 
* 		tests of significant impact in IV estimation.  We use these p-value bounds in
* 		multiple hypothesis testing corrections for the evaluation of statistical significance.
*
* 		Because the Lee et al. adjustment applies only for models with one endogenous variable
* 		and one instrument, we allow for treatment impact to differ across the three terciles
* 		of the baseline test score distribution by running three separate regressions, rather
* 		than by pooling all students and including interaction effects between the treatment
* 		indicator and tercile dummies.
* 
* The second type of estimation uses quantile regressions to study how impact differs 
* by quantile of the unconditional test score distribution.  We do this in two ways.  
*		For the results reported in Table 7, we use unweighted quantile regressions, and
*		report bootstrapped standard errors that account for stratification and clustering.
*
*		For the results reported in Online Appendix Table S6, we use weighted quantile
*		regressions, and report standard errors that do not account for stratification and
*		clustering.  [The goal is just to demonstrate that using weights does not much
*		affect the estimates.]
*
* 		Note that to get the same bootstrapped standard errors for every regression every time,
*		it is necessary to set a seed before each bs regression command.
*
* Software version used: STATA/SE 18.0
*
********************************************************************************
*GLOBAL FILE PATH DEFINITIONS
	global PBRfolder "ADD FILE REFERENCE TO MAIN FOLDER HERE" 
	global datasets = "$PBRfolder\Datasets"
	global logs = "$PBRfolder\Logs"

********************************************************************************
* SET-UP
	set more off
	clear all
	set varabbrev on
	capture log close
	log using "$logs\Table_7_HeterogeneousImpacts", replace
	cd "$datasets"
********************************************************************************

*  Create temporary files for use in estimation

********************************************************************************

	*Create temporary files for all the full endline assessment datasets
		use Math09_endline_IRT, clear
		tempfile e_math_9
		save "`e_math_9'", replace

		use Sci09_endline_IRT, clear
		tempfile e_sci_9
		save "`e_sci_9'", replace

		use Math10_endline_IRT, clear
		tempfile e_math_10
		save "`e_math_10'", replace

		use Sci10_endline_IRT, clear
		tempfile e_sci_10
		save "`e_sci_10'", replace

	* Create temporary files for all the panel sample assessment datasets

		use Math8and9_panel_IRT, clear
		tempfile p_math_9
		save "`p_math_9'", replace 
		
		use Sci8and9_panel_IRT, clear
		tempfile p_sci_9
		save "`p_sci_9'", replace
		
		use Math9and10_panel_IRT, clear
		tempfile p_math_10
		save "`p_math_10'", replace
		
		use Sci9and10_panel_IRT, clear
		tempfile p_sci_10
		save "`p_sci_10'", replace	
		

	*Create temporary school data with study design, assent, and survey weights variables
		
		use endlinetestassent, clear
		
		*Add survey weights
			merge 1:1 schoolid using basicdata 	//0 unmatched
			drop _m
		
		*Create new study arm and strata variable = district*10+stratum
			gen dist_stratum = (district*10)+stratum
			gen treat=studyarm==1 | studyarm==2
			
		*Describe the survey data
			svyset schoolid [pweight=sch_wght], strata(dist_stratum)
		
		*Save school data
			tempfile sch_temp
			save "`sch_temp'", replace
		
	*Create temporary file of teacher data that can be matched to grade 9 and 10 students
	*If more than one teacher, take average over all teachers
		
		* Get the teacher characteristics
			use Teacher_c, clear
			gen texp05yr=(t_exper<=5) if t_exper~=.
			keep teacherid t_perm texp05yr t_g9_math t_g10_math t_g9_sci t_g10_sci
			sort teacherid
			
		* Bring in data from baseline on teachers' participation in SSRP (past) trainings and save teacher data
			merge 1:1 teacherid using SSRPdata  //48 unmatched (2 master, 46 using)
			
			l teacherid if _m==1 
			*Drop obs not in main teacher data
			*drop if _m==1 //2
			keep teacherid t_perm texp05yr t_g9_math t_g10_math t_g9_sci t_g10_sci SSRP*
			sort teacherid
			tempfile tchrtemp
			save "`tchrtemp'" , replace
			
		* Bring in teacher SSDP attendance data
			use SSDP_VA_attendance, clear
			count if teacherid=="." //Cannot match these 23 teachers to students
			drop if teacherid=="." //23
							
			* Following are fixes for Jumla district
			replace ssdp_math_days=. if district==63 & ssdp_math==1 
			replace ssdp_sci_days=. if district==63 & ssdp_sci==1
			
			* Define math training as completed 6 or more days, if days missing assume > 6 
			tab ssdp_math_days ssdp_math, mi
			gen ssdp_m_t=(ssdp_math_days==6 | ssdp_math_days==10)
			replace ssdp_m_t=1 if ssdp_math==1 & ssdp_math_days==.
			label var ssdp_m_t "Math teacher had SSDP training"
			tab ssdp_m_t
			
			* Define science training as completed 9-10 days, if days missing assume > 9 
			tab ssdp_sci_days ssdp_sci, mi
			gen ssdp_s_t=(ssdp_sci_days==9 | ssdp_sci_days==10)
			replace ssdp_s_t=1 if ssdp_sci==1 & ssdp_sci_days==.
			label var ssdp_m_t "Science teacher had SSDP training"
			tab ssdp_s_t
			
			*merge in earlier teacher data
			sort teacherid
			merge 1:1 teacherid using "`tchrtemp'"  
			drop _m 
			sort teacherid
			save "`tchrtemp'" , replace

		* Match teacher characteristics to students, using average if >1 tchr
		
			* Start with datafile on students with code to match to grade 9 math teachers
				use T_stu_sections09, clear
				tab m_match_type
				drop if m_match_type==2 //745 students not matched to any math teacher
				
			* Create duplicate observations for kids matched to more than 1 teacher
			* Note that mathteacherid is "blank" if 2 different teachers exist
				l mathteacheridA mathteacheridB if mathteacherid==".d"
				tab mathteacherid if mathteacheridA~="" | mathteacheridB~=""
				d,s // 6056 observations
				expand 2 if m_match_type==3 //154 more obs created
			
				d,s //6210 (=6056+154) observations
				
				bysort stu_serial: gen stuobnum=_n
				tab stuobnum
				replace mathteacherid=mathteacheridA if m_match_type==3 & stuobnum==1
				replace mathteacherid=mathteacheridB if m_match_type==3 & stuobnum==2
				rename mathteacherid teacherid
				sort teacherid
				keep schoolid district stu_serial teacherid m_match_type
				
			* Bring in teacher data and average over teachers if students has 2 teachers 
				merge m:1 teacherid using "`tchrtemp'" //399 unmatched (399 using)  - I have 400 not 399
							
				su t_g9_math t_g10_math t_g9_sci t_g10_sci if _m==2 /* don't teach gr 9 math */
				su t_g9_math t_g10_math t_g9_sci t_g10_sci if _m==3 /* teach grade 9 math */
				drop if _m==2 //400 unmatched dropped
				 
				rename t_perm mat_t_perm
				rename texp05yr mat_t_exp05yr
				rename SSRPmath mat_t_ssrp_train
				keep stu_serial mat_t_* ssdp_m_t m_match_type
			
			*Check observation numbers and save
				sort stu_serial
				d, s /* 6210 observations, including students with 2 math teachers */
				
				collapse mat_t_perm mat_t_exp05yr mat_t_ssrp_train ssdp_m_t m_match_type, by(stu_serial) 
				d, s /* 6056 observations after averaging over math teachers */
				
				label var mat_t_perm "Math teacher has permanent contract"
				label var mat_t_exp05yr "Math teacher experience 5 years or less"
				label var mat_t_ssrp_train "Math teacher has had SSRP training"
				tempfile g09mattc
				save "`g09mattc'" , replace
				
			* Start with datafile on students with code to match to grade 10 math teachers
				use T_stu_sections10, clear
				count if stu_serial=="" //0
				drop if stu_serial=="" //0
				tab m_match_type
				tab1 mathteacherid mathteacheridA mathteacheridB if m_match_type==2  
				drop if m_match_type==2 //303, students not matched to any math teacher 
							
			* Create duplicate observations for kids matched to more than 1 teacher
			* Note that mathteacherid is "blank" if 2 different teachers exist
				l mathteacheridA mathteacheridB if mathteacherid==".d"
				tab mathteacherid if mathteacheridA~="" | mathteacheridB~=""
				d,s /* 5530 observations */
				
				expand 2 if m_match_type==3
				d,s /* 5667 (=5530+137) observations */
				
				bysort stu_serial: gen stuobnum=_n
				tab stuobnum
				replace mathteacherid=mathteacheridA if m_match_type==3 & stuobnum==1
				replace mathteacherid=mathteacheridB if m_match_type==3 & stuobnum==2
				rename mathteacherid teacherid
				sort teacherid
				keep schoolid district stu_serial teacherid m_match_type
				
			* Bring in teacher data and average over teachers if students has 2 teachers 
				merge m:1 teacherid using "`tchrtemp'" //837 unmatched (420 master, 418 using) 
				
				su t_g9_math t_g10_math t_g9_sci t_g10_sci if _m==2 /* a few teach gr 10 math */
				su t_g9_math t_g10_math t_g9_sci t_g10_sci if _m==3 /* teach grade 10 math */
				count if _m==2 & t_g10_math==1 //13
				
				drop if _m==2 //418, drop unmatched teachers, who mostly don't teach grade 10 math 
				
				rename t_perm mat_t_perm
				rename texp05yr mat_t_exp05yr
				rename SSRPmath mat_t_ssrp_train
				keep stu_serial mat_t_* ssdp_m_t m_match_type
			
			* Check observation numbers and save
				sort stu_serial
				d, s /* 5667 observations, including students with 2 math teachers */
				
				collapse mat_t_perm mat_t_exp05yr mat_t_ssrp_train ssdp_m_t m_match_type, by(stu_serial) 
				d, s /* 5530 observations after averaging over math teachers */
				
				label var mat_t_perm "Math teacher has permanent contract"
				label var mat_t_exp05yr "Math teacher experience 5 years or less"
				label var mat_t_ssrp_train "Math teacher has had SSRP training"
				tempfile g10mattc
				save "`g10mattc'" , replace
			
			*Start with datafile on students with code to match to grade 9 science teachers
				use T_stu_sections09, clear
				tab s_match_type
				tab1 sciteacherid sciteacheridA sciteacheridB if s_match_type==2  
				drop if s_match_type==2 //802, students not matched to any science teacher
				
			* Create duplicate observations for kids matched to more than 1 teacher
			* Note that sciteacherid is "blank" if 2 different teachers exist
				l sciteacheridA sciteacheridB if sciteacherid==".d"
				tab sciteacherid if sciteacheridA~="" | sciteacheridB~=""
				d,s /* 5999 observations */
				
				expand 2 if s_match_type==3
				d,s /* 6280 (=5999+281) observations */
				
				bysort stu_serial: gen stuobnum=_n
				tab stuobnum
				replace sciteacherid=sciteacheridA if s_match_type==3 & stuobnum==1
				replace sciteacherid=sciteacheridB if s_match_type==3 & stuobnum==2
				rename sciteacherid teacherid
				sort teacherid 
				keep schoolid district stu_serial teacherid s_match_type
			
			* Bring in teacher data and average over teachers if students has 2 teachers 
				merge m:1 teacherid using "`tchrtemp'" // 399 from using unmatched
							
				su t_g9_math t_g10_math t_g9_sci t_g10_sci if _m==2 /* don't teach gr 9 sci */
				su t_g9_math t_g10_math t_g9_sci t_g10_sci if _m==3 /* teach grade 9 science */
				drop if _m==2 //399, drop unmatched teachers, who don't teach grade 9 science
				
				rename t_perm sci_t_perm
				rename texp05yr sci_t_exp05yr
				rename SSRPsci sci_t_ssrp_train
				keep stu_serial sci_t_* ssdp_s_t s_match_type
				
			* Average teacher characteristics over student IDs and save
				sort stu_serial
				d, s /* 6280 observations, including students with 2 science teachers */
				
				collapse sci_t_perm sci_t_exp05yr sci_t_ssrp_train ssdp_s_t s_match_type, by(stu_serial) 
				d, s /* 5999 observations after averaging over science teachers */
				
				label var sci_t_perm "Science teacher has permanent contract"
				label var sci_t_exp05yr "Science teacher experience 5 years or less"
				label var sci_t_ssrp_train "Science teacher has had SSRP training"
				tempfile g09scitc
				save "`g09scitc'" , replace

			*Start with datafile on students with code to match to grade 10 science teachers
				use T_stu_sections10, clear
				count if stu_serial=="" //0
				drop if stu_serial=="" //0
				tab s_match_type
				tab1 sciteacherid sciteacheridA sciteacheridB if s_match_type==2  
				drop if s_match_type==2 //234, students not matched to any science teacher
				
			* Create duplicate observations for kids matched to more than 1 teacher
			* Note that sciteacherid is "blank" if 2 different teachers exist
				l sciteacheridA sciteacheridB if sciteacherid==".d"
				tab sciteacherid if sciteacheridA~="" | sciteacheridB~=""
				d,s /* 5599 observations */
				
				expand 2 if s_match_type==3
				d,s /* 5919 (=5599+320) observations */
				
				bysort stu_serial: gen stuobnum=_n
				tab stuobnum
				replace sciteacherid=sciteacheridA if s_match_type==3 & stuobnum==1
				replace sciteacherid=sciteacheridB if s_match_type==3 & stuobnum==2
				rename sciteacherid teacherid
				sort teacherid
				keep schoolid district stu_serial teacherid s_match_type
				
			* Bring in teacher data and average over teachers if students has 2 teachers 
				merge m:1 teacherid using "`tchrtemp'" //975 unmatched (558 master, 417 using) 
				
				su t_g9_math t_g10_math t_g9_sci t_g10_sci if _m==2 /* don't teach gr 10 sci */
				su t_g9_math t_g10_math t_g9_sci t_g10_sci if _m==3 /* teach grade 10 science */
				count if _m==2 & t_g10_sci==1 //12, Teachers for 12 kids w/ missing ID 
				drop if _m==2 //417, drop unmatched teachers, who don't teach grade 10 science
				
				rename t_perm sci_t_perm
				rename texp05yr sci_t_exp05yr
				rename SSRPsci sci_t_ssrp_train
				keep stu_serial sci_t_* ssdp_s_t s_match_type
				
			* Average teacher characteristics over student IDs and save
				sort stu_serial
				d, s /* 5919 observations, including students with 2 science teachers */
				
				collapse sci_t_perm sci_t_exp05yr sci_t_ssrp_train ssdp_s_t s_match_type, by(stu_serial) 
				d, s /* 5599 observations after averaging over science teachers */
				
				label var sci_t_perm "Science teacher has permanent contract"
				label var sci_t_exp05yr "Science teacher experience 5 years or less"
				label var sci_t_ssrp_train "Science teacher has had SSRP training"
				tempfile g10scitc
				save "`g10scitc'" , replace


****************************************************************************

****DEFINE PROGRAM FOR IMPLEMENTING tF CORRECTIONS AFTER IV/LATE ESTIMATION

****************************************************************************

	capture program drop tFcorrections

	program tFcorrections
	
		/* This program does not fully automate the corrections.  It just speeds up "by hand"
		adjustments using Table 3 in the Lee et al. paper.  The user must fill in the following
		scalars before running the program.  */
		
		
		/* Precede use of this program with this code to set up inputs:
			scalar FF =  			  	// F for test of signif of treat in first stage regression 
			scalar F5_below = 			// F in table 3a just below FF 
			scalar F5_above = 			// F in table 3a just above FF
			scalar A5_below = 			// adjustment factor in 3a for F5below
			scalar A5_above = 			// adjustment factor in 3a for F5above
			scalar F1_below = 			// F in table 3b just below FF
			scalar F1_above = 			// F in table 3b just above FF
			scalar A1_below =			// adjustment factor in 3b for F1below
			scalar A1_above =			// adjustment factor in 3b for F1above
			scalar raw_pval =			// unadjusted pvalue for treated in second stage 
			capture drop in_sample
			gen in_sample =   any if-statement for selecting the sample for iv estimation
			gen testscore =             // left hand side variable
			global reg_command "`e(command)'"  // the relevant ivregress command
		*/
			
		* Determine whether adjustments are needed
			scalar NeedAdjust5 = FF <= 104.67
			scalar NeedAdjust1 = FF<= 252.342
			
							
		* Calc 5% adjusted std error, confidence interval, and statis significance
		
			* Determine adjustment factor 
			
				scalar Adjust5 = 1
		
				if NeedAdjust5==1 {
				
					scalar Adjust5 = A5_below + [(A5_above - A5_below) * ((FF-F5_below)/(F5_above - F5_below))]
													
				}
			
			* Calc 5% adjusted standard error 
				$reg_command  // runs the ivregress command from just before this program was invoked
												
				scalar  se_adj5 = _se[treated]*Adjust5
								
			* Calc 5% adjusted confidence interval
				scalar middle = _b[treated]   // coefficient estimate
				scalar lower5 = r(table)[5,1]  // unadjusted lower bound
				scalar upper5 = r(table)[6,1]  // unadjusted upper bound
				scalar dist = _b[treated] - lower5 
				scalar lower5_adj5 = middle - dist*Adjust5
				scalar upper5_adj5 = middle + dist*Adjust5 
			
			* Determine whether significant at 5% level
				scalar signif5 = (0 < lower5_adj5) | (0 > upper5_adj5) 
								
		* Calc 1% adjusted std error, confidence interval, and statis significance
		
			* Determine adjustment factor 
			
				scalar Adjust1 = 1
		
				if NeedAdjust1==1 {
				
					scalar Adjust1 = A1_below + [(A1_above - A1_below) * ((FF-F1_below)/(F1_above - F1_below))]
													
				}
			

			* Calc 1% adjusted standard error 
				local reg_99 "$reg_command , level(99)"
				`reg_99'
				scalar  se_adj1 = _se[treated]*Adjust1
								
			* Calc 1% adjusted confidence interval
				
				scalar middle = _b[treated]   // coefficient estimate
				scalar lower = r(table)[5,1]  // unadjusted lower bound
				scalar upper = r(table)[6,1]  // unadjusted upper bound
				scalar dist = _b[treated] - lower 
				scalar lower1_adj1 = middle - dist*Adjust1
				scalar upper1_adj1 = middle + dist*Adjust1
			
								
			* Determine whether significant at 1% level
				scalar signif1 = (0< lower1_adj1) | 0 > (upper1_adj1) 
		
		
					
		* Calc 10% adjusted std error, confidence interval, and statis significance
		
			* Use 5% adjustment factor and interpret as "conservative" (in most contexts)
			
				scalar Adjust10 = Adjust5
				
			* Calc 10% adjusted standard error 
				local reg_90 "$reg_command , level(90)"
				`reg_90'
											
				scalar se_adj10 = _se[treated]*Adjust10
			
			
			* Calc 10% adjusted confidence interval (conservative)
				scalar middle = _b[treated]   // coefficient estimate
				scalar lower = r(table)[5,1]  // unadjusted lower bound
				scalar upper = r(table)[6,1]  // unadjusted upper bound
				scalar dist = _b[treated] - lower 
				scalar lower10_adj10 = middle - dist*Adjust10
				scalar upper10_adj10 = middle + dist*Adjust10
							
			* Determine whether significant at 10% level
				scalar signif10 = (0< lower10_adj10) | (0 > upper10_adj10) 
			
		* Calculate approximate adjusted p-value
		
			* I will use .05 adjustments to standard errors for adjusting p-values that are >=.05.
			* 	For unadjusted p-values near .05, these are approximate adjustments.
			* 	For unadjusted p-values that are substantially larger, these are upper bound adjustments (because the actual adjustments would 
			* 	inflate standard errors by less)
			* I will use the .01 adjustments to standard errors for adjusting p-values that are <.05
			*    For p-values around .01, these are approximatley correct.
			*    For p-values between .01 and .05, these are conservative.
			*    For p-values less than .01 (which I don't think is relevant for us), the true corrections would be larger.
			
			* raw pvalue
			$reg_command  // runs the ivregress command from just before this program was invoked
			test treated 
			scalar raw_pval = r(p)
								
			$reg_command  // runs the ivregress command from just before this program was invoked
			
			* replicating unadjusted p-value
			scalar tt = middle/_se[treated]
			scalar dfx = e(df_r)   // e(d_fr) stores the design df from the regression
			scalar p_noadj = 2*ttail(dfx, abs(tt))
			
			
			* Create adjusted t-ratio
			if raw_pval >=0.05 {
				scalar t_adj = middle/se_adj5 
				scalar dfx = e(df_r)
				scalar p_adj = 2*ttail(dfx,abs(t_adj))
			}
			if raw_pval<0.05 {
				scalar t_adj = middle/se_adj1 
				scalar dfx = e(df_r)
				scalar p_adj = 2*ttail(dfx,abs(t_adj))
			}
			
		* Work out stars 
		
			scalar Nstars= [signif10==1] + [signif5==1] + [signif1==1]
			
			
		* debugging report
			di "   "
			di "estimate = " middle 
			di "unadjusted std error = "  _se[treated]  "    t =  "  tt 
			di " adjusted t = "  t_adj 
		
		* Report
			di "     "
			di "     "
			di "Unadjusted standard error=     "  %9.3f _se[treated]
			di "Adjusted 5% standard error =    "  %9.3f se_adj5
			di "Adjusted 1% standard error =    "  %9.3f se_adj1
			di "Unadjusted 95% confidence interval  [ " %9.3f lower5  " , " %9.3f upper5 " ]"
			di "Adjusted 95% confidence interval  [ " %9.3f lower5_adj5 " , " %9.3f upper5_adj5 " ]"
			di "Adjusted number of stars=  " Nstars
			di "Unadjusted and adjusted p-values (adjusted upper bound if unadjusted value>.05):  "  %9.6f p_noadj "  " %9.6f p_adj 
			
	end 


****************************************************************************

****CREATE A SHORT PROGRAM JUST FOR PRINTING ADJUSTED RESULTS 

****************************************************************************	
	
	capture drop print_res
	program print_res
		
			di "     "
			di "     "
			di "Unadjusted standard error=     "  %9.3f _se[treated]
			di "Adjusted 5% standard error =    "  %9.3f se_adj5
			di "Adjusted 1% standard error =    "  %9.3f se_adj1
			di "Unadjusted 95% confidence interval  [ " %9.3f lower5  " , " %9.3f upper5 " ]"
			di "Adjusted 95% confidence interval  [ " %9.3f lower5_adj5 " , " %9.3f upper5_adj5 " ]"
			di "Adjusted number of stars=  " Nstars
			di "Unadjusted and adjusted p-values (adjusted upper bound if unadjusted value>.05):  "  %9.6f p_noadj "  " %9.6f p_adj 
			
	end



****************************************************************************

****GRADE 9 MATH PANEL SAMPLE: DATA MERGING AND ESTIMATION OF IMPACTS THAT
****DIFFER BY TERCILE OF THE BASELINE TEST SCORE DISTRIBUTION

****************************************************************************	

		* Bring in panel math grade 9 assessment data 
		use "`p_math_9'", clear
		
		* Select variables and observations 
		drop Math_* //Only keep latent IRT index, not each assessment item
						
		* Pick observations for which both baseline and endline assessment scores are available 
		sort stu_serial test
		by stu_serial: gen obsnumb=_n
		tab obsnumb // 4913 have two obs
		tab test obsnumb , mi
		bysort stu_serial: egen obscount=max(obsnumb)
		bysort stu_serial: gen first = _n==1
		tab obscount if first==1  // 4626 with 1, 4913 with 2
		drop if obscount==1  // students appearing only at baseline or only at endline
		
		* Construct baseline and endline score variables (Initially the baseline test and the three endline tests are ///
		in separate observations)
		gen m_theta_base=theta_panel_g8_9 if test=="Math08"
		gen m_theta_end=theta_panel_g8_9 if test=="Math09A"|test=="Math09B"|test=="Math09C" 
		gen m_theta_base_SSDP=theta_panel_g8_9_SSDP if test=="Math08"
		gen m_theta_end_SSDP=theta_panel_g8_9_SSDP if test=="Math09A"|test=="Math09B"|test=="Math09C" 
		keep district schoolid stu_serial test m_theta_base* m_theta_end* 
		sort stu_serial test  
			// this sorting is essential for what follows, each student has baseline score and one of three endline scores
		unique stu_serial  // 4913 students, 9826 observations (one baseline, one endline for each student)
		
		* Bring baseline into endline observation for same student
		replace m_theta_base=m_theta_base[_n-1] if m_theta_base==. 
				// for observations with no baseline value (i.e. endline obs) bring in lagged
		replace m_theta_base_SSDP=m_theta_base_SSDP[_n-1] if m_theta_base_SSDP==.  // ditto
		
		l district schoolid stu_serial test m_theta_base m_theta_end in 1/20 
				
		drop if m_theta_end==. // drop the baseline observations, now one obs. per student
		unique stu_serial
		

		* Merge test score data with general student data from endline
		merge 1:1 stu_serial using Grade09_c  //4625 unmatched (4625 from using)
		
		* Construct attrition status using data collected at endline
		tab s_present, m
		tab s_status if s_present==5, m
		gen str8 end_stat="g9presnt" if s_present>=1 & s_present<=4
		replace end_stat="g9absent" if s_status==1
		replace end_stat="grade8" if s_status==2 
		replace end_stat="othgovsc" if s_status==3 
		replace end_stat="priv_sch" if s_status==4
		replace end_stat="dropout" if s_status==5 
		replace end_stat="unknown" if s_status==6 
		tab end_stat, mi  
		tab end_stat _m, mi co
		tab end_stat s_baseline, mi // end_stat missing only if not in baseline 
		
		* Drop if student not present at baseline
		drop if s_baseline==0
		
		* Drop bad matches
		tab end_stat _m, mi co // 10 cases w/ test scores even though not present
		l district schoolid stu_serial m_theta_base_SSDP if _m==3 & end_stat~="g9presnt"
		drop if _m==3 & end_stat~="g9presnt" //Bad matches, 10
		tab end_stat _m, mi co
		drop _m

		* Bring in school data
		sort schoolid
		merge m:1 schoolid using "`sch_temp'"  	//0 unmatched
		drop _m		
		
		
		*Describe the survey data
		svyset schoolid [pweight=sch_wght], strata(dist_stratum)
		sort stu_serial
		tab end_stat, mi
		tab end_stat [aw=sch_wght], mi 

		* Bring in data on grade 9 students math teachers
		merge 1:1 stu_serial using "`g09mattc'"  	//4975 unmatched (3279 master, 1696 using)
		
		tab _m if m_theta_base~=. & m_theta_end~=. // 88.8% match of those with tests
		tab _m s_baseline, mi //1696 are teachers of endline-only kids, so drop
		drop if _m==2 //1696
		drop _m
		
		* Drop if missing one or both test scores
		drop if m_theta_base==. | m_theta_end==. //2736 
		d,s

		* Normalize both baseline and endline by endline standard deviation
		svy, over(treat): mean m_theta_end 
			mat b=e(b)
			gen mean = b[1,1]
			estat sd
			mat sd = r(sd)
			gen sd = sd[1,1]
			gen m_stdIRT_end=(m_theta_end-mean)/sd
			gen m_stdIRT_base=(m_theta_base-mean)/sd
			drop sd mean
		
		svy, over(treat): mean m_theta_end_SSDP 
			mat b=e(b)
			gen mean = b[1,1]
			estat sd
			mat sd = r(sd)
			gen sd = sd[1,1]
			gen m_stdIRT_end_SSDP=(m_theta_end_SSDP-mean)/sd
			gen m_stdIRT_base_SSDP=(m_theta_base_SSDP-mean)/sd
			drop sd mean
		
		* Generate treat and treated variables
		gen treated=ssdp_m_t*treat
		
		* Save this dataset for later use
		tempfile d9mpanel
		save "`d9mpanel'", replace
		
								
	*GRADE 9 MATH PANEL SAMPLE, DESCRIPTIVE STATISTICS
	
		* set survey parameters
		svyset schoolid [pweight=sch_wght], strata(dist_stratum)
		
		* Check that standardized score mean is zero in control sample
		svy, over(treat): mean m_stdIRT_end
		
	
	* SETUP FOR COLLECTING P-VALUES (FOR LATER USE IN MULTIPLE HYPOTHESIS TESTING CORRECTIONS)
		gen p1= .   // We will collect unadjusted pvalues for impact estimates in the variable p1
		gen p2= .   // We will collect adjusted pvalues for impact estimates in the variable p2
		gen str20 specification = "to be filled in"  // this will be for verifying that the right p-values are being collected for a given test-taking
		local j= 0   // will increase this by 1 for each specification/pvalue, and use for filling p-values into different observations of the variable p1
	
	
 	* GRADE 9 PANEL SAMPLE, IMPACT ESTIMATION 
			
		* set test score variables
			
			gen score = m_stdIRT_end
			gen score_base = m_stdIRT_base
			
			* create tercile category variable and dummies
			xtile tercilecat = score_base [pweight = sch_wght], nquantiles(3)
			gen bot_3rd= tercilecat==1
			gen mid_3rd = tercilecat==2
			gen top_3rd = tercilecat==3
			summ bot_3rd mid_3rd top_3rd
			
																					
		* IV/LATE ESTIMATION
								
			* IV estimate with correction for standard errors and confidence intervals following Lee et al. 2022
			* Separate for each tercile of baseline text score distribution
			
			
			* check for problem strata
				preserve
				keep if bot_3rd==1
				svydescribe if treated~=. // dist_stratum 372 
				restore 
				
				preserve
				keep if mid_3rd==1
				svydescribe if treated~=. // dist_stratum 22
				restore 
				
				preserve
				keep if top_3rd==1
				svydescribe if treated~=. // dist_stratum 22 and 632
				restore 
				
				capture drop todrop
				gen todrop = dist_stratum==372 | dist_stratum==22 | dist_stratum==632  // too few schools for stderr calc
				
				tab tercilecat if todrop~=1 & treated~=.
				
	
			*  BOTTOM TERCILE
			
				capture drop in_sample
				gen in_sample=	todrop~=1 & bot_3rd==1  
				
									
				* first stage regression
				svy: reg treated treat Asnt_aft Math_1st district#stratum if in_sample==1
									
					
				* grab F stat from first stage regression for use in calculating correction to second stage standard errors
					test treat 
					scalar FF=r(F)  // This is the F stat that is the basis for Lee et al. 2022 corrections of second stage standard errors
					di FF   // User needs to choose values from Tables in the Lee paper based on the size of this F stat.
					// 202.638
					
									
				* unadjusted second stage estimation 
					svy: ivregress 2sls score Asnt_aft Math_1st district#stratum (treated = treat) if in_sample==1 
					estimates store g9math_tercile1
					scalar rsquared=e(r2)
										
				* run program for adjustments 
					
					capture drop testscore
					gen testscore = score
					global reg_command "`e(cmdline)'"
					
					/* If FF is greater than the highest F value lists in Table 3A or Table 3B of Lee et al., fill in the higher F stat value ///
							in the table for the "below" and "above numbers. For example, for FF=237.2, fill in 104.67 for F5_below and F5_above */
					scalar F5_below = 	104.67  //  F in table 3a just below FF 
					scalar F5_above = 	104.67  // F in table 3a just above FF
					scalar A5_below = 	1.00   // adustment factor in 3a for F5below
					scalar A5_above = 	1.00   // adjustment factor in 3a for F5above
					scalar F1_below = 	174.370		// F in table 3b just below FF
					scalar F1_above = 	252.342		// F in table 3b just above FF
					scalar A1_below =	1.097		// adjustment factor in 3b for F1below
					scalar A1_above =	1.059		// adjustment factor in 3b for F1above
													
					tFcorrections // running program
							
				* store p-value for later multiple hypothesis testing correction
					local j = `j' + 1
					replace p1 = p_noadj if _n==`j'
					replace p2 = p_adj if _n==`j'
					replace specification = "G9 math late bot third" if _n==`j'
		
				* print unaadjusted results
					di "SPECIFICATION IS:  " specification[`j']
					etable, column(index) estimates(g9math_tercile1) showstars showstarsnote keep(treated) /// 
					cstat(_r_b) cstat(_r_se) cstat(_r_ci) mstat(N) ///
					mstat(r2) stars(.10 "*" .05 "**" .01 "***", attach(_r_b))  // Print unadjusted results
					di "r-squared=  " rsquared  // less rounded
							
					print_res  // print adjusted results for Table 7
									
					
			*  	MIDDLE TERCILE
			
				* set sample
				capture drop in_sample
				gen in_sample= todrop~=1 & mid_3rd==1
							
				* first stage
				svy: reg treated treat Asnt_aft Math_1st district#stratum if in_sample==1 
									
					
				* grab F stat from first stage for use in calculating correction to second stage standard errors
					test treat 
					scalar FF=r(F)  // This is the F stat that is the basis for Lee et al. 2022 corrections of second stage standard errors
					di FF   // User needs to choose values from Tables in the Lee paper based on the size of this F stat.
					// 262.471
									
										
				* unadjusted second stage estimation 
					svy: ivregress 2sls score Asnt_aft Math_1st district#stratum (treated = treat) if in_sample==1 
					estimates store g9math_tercile2
					scalar rsquared=e(r2)
					
				* run program for adjustments 
					
					capture drop testscore
					gen testscore = score
					
					global reg_command "`e(cmdline)'"
					/* If FF is greater than the highest F value lists in Table 3A or Table 3B of Lee et al., fill in the higher F stat value ///
							in the table for the "below" and "above numbers. For example, for FF=237.2, fill in 104.67 for F5_below and F5_above */
					scalar F5_below = 	104.67  //  F in table 3a just below FF 
					scalar F5_above = 	104.67  // F in table 3a just above FF
					scalar A5_below = 	1.00   // adustment factor in 3a for F5below
					scalar A5_above = 	1.00   // adjustment factor in 3a for F5above
					scalar F1_below = 	252.432		// F in table 3b just below FF
					scalar F1_above = 	252.342		// F in table 3b just above FF
					scalar A1_below =	1.059		// adjustment factor in 3b for F1below
					scalar A1_above =	1.059		// adjustment factor in 3b for F1above
													
					tFcorrections // running program
							
				* store p-value for later multiple hypothesis testing correction
					local j = `j' + 1
					replace p1 = p_noadj if _n==`j'
					replace p2 = p_adj if _n==`j'
					replace specification = "G9 math late mid third" if _n==`j'
		
				* print results
					di "SPECIFICATION IS:  " specification[`j']
					etable, column(index) estimates(g9math_tercile2) showstars showstarsnote keep(treated) /// 
					cstat(_r_b) cstat(_r_se) cstat(_r_ci) mstat(N) ///
					mstat(r2) stars(.10 "*" .05 "**" .01 "***", attach(_r_b))  // Print unadjusted results
					di "r-squared=  " rsquared  // less rounded
							
					print_res  // print adjusted results for Table 7
					
					
			*  	TOP TERCILE
			
				* set sample
				capture drop in_sample
				gen in_sample=	 todrop~=1 & top_3rd==1   
				
					
				* first stage
				svy: reg treated treat Asnt_aft Math_1st district#stratum if in_sample==1 
								
					
				* grab F stat from first stage for use in calculating correction to second stage standard errors
					test treat 
					scalar FF=r(F)  // This is the F stat that is the basis for Lee et al. 2022 corrections of second stage standard errors
					di FF   // User needs to choose values from Tables in the Lee paper based on the size of this F stat.
					// 115.338
					
					
				* unadjusted second stage estimation 
					svy: ivregress 2sls score Asnt_aft Math_1st district#stratum (treated = treat) if in_sample==1 
					estimates store g9math_tercile3
					scalar rsqured=e(r2)
					
					
				* run program for adjustments 
					
					capture drop testscore
					gen testscore = score
					global reg_command "`e(cmdline)'"
					
					/* If FF is greater than the highest F value lists in Table 3A or Table 3B of Lee et al., fill in the higher F stat value ///
							in the table for the "below" and "above numbers. For example, for FF=237.2, fill in 104.67 for F5_below and F5_above */
					scalar F5_below = 	104.67  //  F in table 3a just below FF 
					scalar F5_above = 	104.67  // F in table 3a just above FF
					scalar A5_below = 	1.00   // adustment factor in 3a for F5below
					scalar A5_above = 	1.00   // adjustment factor in 3a for F5above
					scalar F1_below = 	100.069		// F in table 3b just below FF
					scalar F1_above = 	128.950		// F in table 3b just above FF
					scalar A1_below =	1.177	// adjustment factor in 3b for F1below
					scalar A1_above =	1.136		// adjustment factor in 3b for F1above
													
					tFcorrections // running program
			
		
				* store p-value for later multiple hypothesis testing correction
					local j = `j' + 1
					replace p1 = p_noadj if _n==`j'
					replace p2 = p_adj if _n==`j'
					replace specification = "G9 math late top third" if _n==`j'
		
				* print results
					di "SPECIFICATION IS:  " specification[`j']
					etable, column(index) estimates(g9math_tercile3) showstars showstarsnote keep(treated) /// 
					cstat(_r_b) cstat(_r_se) cstat(_r_ci) mstat(N) ///
					mstat(r2) stars(.10 "*" .05 "**" .01 "***", attach(_r_b))  // Print unadjusted results
					di "r-squared=  " rsquared  // less rounded
							
					print_res  // print adjusted results for Table 7
							
					
		* save the 3 collected p-values and their specifications in a small dataset	
			keep if _n<=3
			keep p1 p2 specification
			tempfile g9math_tercile
			save "`g9math_tercile'", replace
			list
				
	
****************************************************************************

****GRADE 10 MATH PANEL SAMPLE: DATA MERGING AND ESTIMATION OF IMPACTS THAT
****DIFFER BY TERCILE OF THE BASELINE TEST SCORE DISTRIBUTION

****************************************************************************	

		* Bring in panel math grade 10 assessment data 
		use "`p_math_10'", clear
		
		* Select variables and observations 
		drop Math_* //Only keep latent IRT index, not each assessment item
						
		* Pick observations for which both baseline and endline assessment scores are available 
		sort stu_serial test
		by stu_serial: gen obsnumb=_n
		tab obsnumb // 5001 have two obs
		
		tab test obsnumb , mi
		bysort stu_serial: egen obscount=max(obsnumb)
		bysort stu_serial: gen first = _n==1
		tab obscount if first==1  // 4615 with 1, 5001 with 2
		drop if obscount==1  // students appearing only at baseline or only at endline
		unique stu_serial
		
		* Construct baseline and endline score variables (Initially the baseline test and the three endline tests are ///
		in separate observations)
		gen m_theta_base=theta_panel_g9_10 if test=="Math09"
		gen m_theta_end=theta_panel_g9_10 if test=="Math10A"|test=="Math10B"|test=="Math10C" 
		gen m_theta_base_SSDP=theta_panel_g9_10_SSDP if test=="Math09"
		gen m_theta_end_SSDP=theta_panel_g9_10_SSDP if test=="Math10A"|test=="Math10B"|test=="Math10C" 
		keep district schoolid stu_serial test m_theta_base* m_theta_end* 
		sort stu_serial test  
			// this sorting is essential for what follows, each student has baseline score and one of three endline scores
		unique stu_serial  //5001 students
		
		
		* Bring baseline into endline observation for same student
		replace m_theta_base=m_theta_base[_n-1] if m_theta_base==. 
				// for observations with no baseline value (i.e. endline obs) bring in lagged
		replace m_theta_base_SSDP=m_theta_base_SSDP[_n-1] if m_theta_base_SSDP==.  // ditto
		
		l district schoolid stu_serial test m_theta_base m_theta_end in 1/20 
				
		drop if m_theta_end==. // drop the baseline observations, now one obs. per student
		unique stu_serial
		
		* Merge test score data with general student data from endline
		merge 1:1 stu_serial using Grade10_c  //4075 unmatched (from using)
		
		* Construct attrition status using data collected at endline
		tab s_present, m
		tab s_status if s_present==5, m
		gen str8 end_stat="g10prsnt" if s_present>=1 & s_present<=4
		replace end_stat="g10absnt" if s_status==1
		replace end_stat="grade9" if s_status==2 
		replace end_stat="othgovsc" if s_status==3 
		replace end_stat="priv_sch" if s_status==4
		replace end_stat="dropout" if s_status==5 
		replace end_stat="unknown" if s_status==6 
		tab end_stat, mi  
		tab end_stat _m, mi co // 9 cases with test scores even though not preset
		tab end_stat s_baseline, mi // end_stat missing only if not in baseline 
		
		l district schoolid stu_serial if _m==3 & end_stat~="g10prsnt"
		drop if _m==3 & end_stat~="g10prsnt" //Bad matches, 9
		tab end_stat _m, mi co
		drop _m
			
		* Drop if student not present at baseline
		drop if s_baseline==0
				
		* Bring in school data
		sort schoolid
		merge m:1 schoolid using "`sch_temp'"  	//13 unmatched
		drop if _m==2 // 13
		drop _m
				
		*Describe the survey data
		svyset schoolid [pweight=sch_wght], strata(dist_stratum)
		sort stu_serial
		tab end_stat, mi
		tab end_stat [aw=sch_wght], mi 
		

		* Bring in data on grade 9 students math teachers
		merge 1:1 stu_serial using "`g10mattc'"  	//4320 unmatched (3513 master, 807 using)
		
		tab _m if m_theta_base~=. & m_theta_end~=. // 94.5% match of those with tests
		tab _m s_baseline, mi //807 are teachers of endline-only kids, so drop
		drop if _m==2 //807
		drop _m
		
		* Drop if missing one or both test scores
		drop if m_theta_base==. | m_theta_end==. //3244
		d,s

		* Normalize both baseline and endline by endline standard deviation
		svy, over(treat): mean m_theta_end 
			mat b=e(b)
			gen mean = b[1,1]
			estat sd
			mat sd = r(sd)
			gen sd = sd[1,1]
			gen m_stdIRT_end=(m_theta_end-mean)/sd
			gen m_stdIRT_base=(m_theta_base-mean)/sd
			drop sd mean
		
		svy, over(treat): mean m_theta_end_SSDP 
			mat b=e(b)
			gen mean = b[1,1]
			estat sd
			mat sd = r(sd)
			gen sd = sd[1,1]
			gen m_stdIRT_end_SSDP=(m_theta_end_SSDP-mean)/sd
			gen m_stdIRT_base_SSDP=(m_theta_base_SSDP-mean)/sd
			drop sd mean
		
		* Generate treat and treated variables
		gen treated=ssdp_m_t*treat
		
		* Save this dataset for later use
		tempfile d10mpanel
		save "`d10mpanel'", replace
	
								
	*GRADE 10 MATH PANEL SAMPLE, DESCRIPTIVE STATISTICS
	
		* set survey parameters
		svyset schoolid [pweight=sch_wght], strata(dist_stratum)
		
		* Check that standardized score mean is zero in control sample
		svy, over(treat): mean m_stdIRT_end
		svy, over(treat): mean m_stdIRT_end_SSDP
	
		
	* SETUP FOR COLLECTING P-VALUES (FOR LATER USE IN MULTIPLE HYPOTHESIS TESTING CORRECTIONS)
		gen p1= .   // We will collect unadjusted pvalues for impact estimates in the variable p1
		gen p2= .   // We will collect adjusted pvalues for impact estimates in the variable p2
		gen str20 specification = "to be filled in"  // this will be for verifying that the right p-values are being collected for a given test-taking
		local j= 0   // will increase this by 1 for each specification/pvalue, and use for filling p-values into different observations of the variable p1
	
	
 	* GRADE 10 MATH PANEL SAMPLE, IMPACT ESTIMATION 
			
		* set test score variables
			
			gen score = m_stdIRT_end
			gen score_base = m_stdIRT_base
			
			* create tercile category variable and dummies
			xtile tercilecat = score_base [pweight = sch_wght], nquantiles(3)
			gen bot_3rd= tercilecat==1
			gen mid_3rd = tercilecat==2
			gen top_3rd = tercilecat==3
			summ bot_3rd mid_3rd top_3rd
								
																					
		* IV/LATE ESTIMATION
								
			* IV estimate with correction for standard errors and confidence intervals following Lee et al. 2022
			* Separate for each tercile of baseline text score distribution
			
			* check for problem strata
				preserve
				keep if bot_3rd==1
				svydescribe if treated~=. // dist_stratum 282 372
				restore 
			
				
				preserve
				keep if mid_3rd==1
				svydescribe if treated~=. // dist_stratum 372
				restore 
				
				preserve
				keep if top_3rd==1
				svydescribe if treated~=. // dist_stratum 372
				restore 
				
				capture drop todrop
				gen todrop = dist_stratum==282 | dist_stratum==372 // too few schools for std error calculation
				
				tab tercilecat if todrop~=1 & treated~=.
			
							
			*  BOTTOM TERCILE
			
				* set sample
				capture drop in_sample
				gen in_sample = todrop~=1 & bot_3rd==1 
			
					
				* first stage regression
				svy: reg treated treat Asnt_aft Math_1st district#stratum if in_sample==1
											
					
				* grab F stat from first stage regression for use in calculating correction to second stage standard errors
					test treat 
					scalar FF=r(F)  // This is the F stat that is the basis for Lee et al. 2022 corrections of second stage standard errors
					di FF   // User needs to choose values from Tables in the Lee paper based on the size of this F stat.
					// 136.84
					
												
				* unadjusted second stage estimation 
					svy: ivregress 2sls score Asnt_aft Math_1st district#stratum (treated = treat) if in_sample==1 
					estimates store g10math_tercile1
					scalar rsquared=e(r2)
										
				* run program for adjustments 
					
					capture drop testscore
					gen testscore = score
					global reg_command "`e(cmdline)'"
					
					/* If FF is greater than the highest F value lists in Table 3A or Table 3B of Lee et al., fill in the higher F stat value ///
							in the table for the "below" and "above numbers. For example, for FF=237.2, fill in 104.67 for F5_below and F5_above */
					scalar F5_below = 	104.67  //  F in table 3a just below FF 
					scalar F5_above = 	104.67  // F in table 3a just above FF
					scalar A5_below = 	1.00   // adustment factor in 3a for F5below
					scalar A5_above = 	1.00   // adjustment factor in 3a for F5above
					scalar F1_below = 	128.950		// F in table 3b just below FF
					scalar F1_above =   174.370 // F in table 3b just above FF
					scalar A1_below =	1.136		// adjustment factor in 3b for F1below
					scalar A1_above =	1.097		// adjustment factor in 3b for F1above
													
					tFcorrections // running program
							
				* store p-value for later multiple hypothesis testing correction
					local j = `j' + 1
					replace p1 = p_noadj if _n==`j'
					replace p2 = p_adj if _n==`j'
					replace specification = "G10 math bot third" if _n==`j'
		
				* print results
					di "SPECIFICATION IS:  " specification[`j']
					etable, column(index) estimates(g10math_tercile1) showstars showstarsnote keep(treated) /// 
					cstat(_r_b) cstat(_r_se) cstat(_r_ci) mstat(N) ///
					mstat(r2) stars(.10 "*" .05 "**" .01 "***", attach(_r_b))  // Print unadjusted results
					di "r-squared=  " rsquared  // less rounded
							
					print_res  // print adjusted results for Table 7
					
									
			*  	MIDDLE TERCILE
			
				* set sample
				
				capture drop in_sample
				gen in_sample= todrop~=1 & mid_3rd==1 							
							
				* first stage
				svy: reg treated treat Asnt_aft Math_1st district#stratum if in_sample==1 
							
		
				* grab F stat from first stage for use in calculating correction to second stage standard errors
					test treat 
					scalar FF=r(F)  // This is the F stat that is the basis for Lee et al. 2022 corrections of second stage standard errors
					di FF   // User needs to choose values from Tables in the Lee paper based on the size of this F stat.
					// 190.31					
										
				* unadjusted second stage estimation 
					svy: ivregress 2sls score Asnt_aft Math_1st district#stratum (treated = treat) if in_sample==1 
					estimates store g10math_tercile2
					scalar rsquared=e(r2)
					
				* run program for adjustments 
					
					capture drop testscore
					gen testscore = score
					
					global reg_command "`e(cmdline)'"
					/* If FF is greater than the highest F value lists in Table 3A or Table 3B of Lee et al., fill in the higher F stat value ///
							in the table for the "below" and "above numbers. For example, for FF=237.2, fill in 104.67 for F5_below and F5_above */
					scalar F5_below = 	104.67  //  F in table 3a just below FF 
					scalar F5_above = 	104.67  // F in table 3a just above FF
					scalar A5_below = 	1.00   // adustment factor in 3a for F5below
					scalar A5_above = 	1.00   // adjustment factor in 3a for F5above
					scalar F1_below = 	174.370		// F in table 3b just below FF
					scalar F1_above = 	252.342		// F in table 3b just above FF
					scalar A1_below =	1.097		// adjustment factor in 3b for F1below
					scalar A1_above =	1.059		// adjustment factor in 3b for F1above
													
					tFcorrections // running program
							
				* store p-value for later multiple hypothesis testing correction
					local j = `j' + 1
					replace p1 = p_noadj if _n==`j'
					replace p2 = p_adj if _n==`j'
					replace specification = "G10 math mid third" if _n==`j'
		
				* print results
					di "SPECIFICATION IS:  " specification[`j']
					etable, column(index) estimates(g10math_tercile2) showstars showstarsnote keep(treated) /// 
					cstat(_r_b) cstat(_r_se) cstat(_r_ci) mstat(N) ///
					mstat(r2) stars(.10 "*" .05 "**" .01 "***", attach(_r_b))  // Print unadjusted results
					di "r-squared=  " rsquared  // less rounded
							
					print_res  // print adjusted results for Table 7
							
					
			*  	TOP TERCILE
			
				* set sample
				capture drop in_sample
				gen in_sample= todrop~=1 & top_3rd==1 
				
							
				* first stage
				svy: reg treated treat Asnt_aft Math_1st district#stratum if in_sample==1 
								
					
				* grab F stat from first stage for use in calculating correction to second stage standard errors
					test treat 
					scalar FF=r(F)  // This is the F stat that is the basis for Lee et al. 2022 corrections of second stage standard errors
					di FF   // User needs to choose values from Tables in the Lee paper based on the size of this F stat.
					// 129.66
					
					
				* unadjusted second stage estimation 
					svy: ivregress 2sls score Asnt_aft Math_1st district#stratum (treated = treat) if in_sample==1 
					estimates store g10math_tercile3
					scalar rsquared=e(r2)
					
					
				* run program for adjustments 
					
					capture drop testscore
					gen testscore = score
					global reg_command "`e(cmdline)'"
					
					/* If FF is greater than the highest F value lists in Table 3A or Table 3B of Lee et al., fill in the higher F stat value ///
							in the table for the "below" and "above numbers. For example, for FF=237.2, fill in 104.67 for F5_below and F5_above */
					scalar F5_below = 	83.823  //  F in table 3a just below FF 
					scalar F5_above = 	104.67  // F in table 3a just above FF
					scalar A5_below = 	1.024   // adustment factor in 3a for F5below
					scalar A5_above = 	1.00   // adjustment factor in 3a for F5above
					scalar F1_below = 	80.502		// F in table 3b just below FF
					scalar F1_above = 	100.069	// F in table 3b just above FF
					scalar A1_below =	1.220	// adjustment factor in 3b for F1below
					scalar A1_above =	1.177		// adjustment factor in 3b for F1above
													
					tFcorrections // running program
			
		
				* store p-value for later multiple hypothesis testing correction
					local j = `j' + 1
					replace p1 = p_noadj if _n==`j'
					replace p2 = p_adj if _n==`j'
					replace specification = "G10 math top third" if _n==`j'
		
				* print results
					di "SPECIFICATION IS:  " specification[`j']
					etable, column(index) estimates(g10math_tercile3) showstars showstarsnote keep(treated) /// 
					cstat(_r_b) cstat(_r_se) cstat(_r_ci) mstat(N) ///
					mstat(r2) stars(.10 "*" .05 "**" .01 "***", attach(_r_b))  // Print unadjusted results
					di "r-squared=  " rsquared  // less rounded
							
					print_res  // print adjusted results for Table 7
	
				
					
		* save the 3 collected p-values and their specifications in a small dataset	
			keep if _n<=3
			keep p1 p2 specification
			tempfile g10math_tercile
			save "`g10math_tercile'", replace
			list
	

	
****************************************************************************

****GRADE 9 SCIENCE PANEL SAMPLE: DATA MERGING AND ESTIMATION OF IMPACTS THAT
****DIFFER BY TERCILE OF THE BASELINE TEST SCORE DISTRIBUTION

****************************************************************************	

		* Bring in panel science grade 9 assessment data 
		use "`p_sci_9'", clear
				
		* Select variables and observations 
		drop Sci_* //Only keep latent IRT index, not each assessment item
						
		* Pick observations for which both baseline and endline assessment scores are available 
		sort stu_serial test
		by stu_serial: gen obsnumb=_n
		tab obsnumb // 4912 2's, 1 three
		tab test obsnumb , mi
		bysort stu_serial: egen obscount=max(obsnumb)
		bysort stu_serial: gen first = _n==1
		tab obscount if first==1  // 4627 with 1, 4911 with 2, 1 with 3
		list stu_serial if obscount==3  // student serial missing
		
		drop if obscount==1 | obscount==3 // students appearing only at baseline or only at endline, and those with no serial
		
		* Construct baseline and endline score variables (Initially the baseline test and the three endline tests are ///
		in separate observations)
		gen s_theta_base=theta_panel_g8_9 if test=="Sci08"
		gen s_theta_end=theta_panel_g8_9 if test=="Sci09A"|test=="Sci09B"  
		gen s_theta_base_SSDP=theta_panel_g8_9_SSDP if test=="Sci08"
		gen s_theta_end_SSDP=theta_panel_g8_9_SSDP if test=="Sci09A"|test=="Sci09B" 
		keep district schoolid stu_serial test s_theta_base* s_theta_end* 
		sort stu_serial test  
				// this sorting is essential for what follows, each student has baseline score and one of three endline scores
		unique stu_serial  // 4911 students, 9822 observations (one baseline, one endline for each student)
		
		* Bring baseline into endline observation for same student
		replace s_theta_base=s_theta_base[_n-1] if s_theta_base==. 
				// for observations with no baseline value (i.e. endline obs) bring in lagged
		replace s_theta_base_SSDP=s_theta_base_SSDP[_n-1] if s_theta_base_SSDP==.  // ditto
		
		l district schoolid stu_serial test s_theta_base s_theta_end in 1/20 
				
		drop if s_theta_end==. // drop the baseline observations, now one obs. per student
		unique stu_serial
		

		* Merge test score data with general student data from endline
		merge 1:1 stu_serial using Grade09_c  //4627 unmatched (4625 from using)
		
		* Construct attrition status using data collected at endline
		tab s_present, m
		tab s_status if s_present==5, m
		gen str8 end_stat="g9presnt" if s_present>=1 & s_present<=4
		replace end_stat="g9absent" if s_status==1
		replace end_stat="grade8" if s_status==2 
		replace end_stat="othgovsc" if s_status==3 
		replace end_stat="priv_sch" if s_status==4
		replace end_stat="dropout" if s_status==5 
		replace end_stat="unknown" if s_status==6 
		tab end_stat, mi  
		tab end_stat _m, mi co
		tab end_stat s_baseline, mi // end_stat missing only if not in baseline 
		
		* Drop if student not present at baseline
		drop if s_baseline==0
		
		* Drop bad matches
		tab end_stat _m, mi co // 10 cases w/ test scores even though not present
		l district schoolid stu_serial s_theta_base_SSDP if _m==3 & end_stat~="g9presnt"
		drop if _m==3 & end_stat~="g9presnt" //Bad matches, 10
		tab end_stat _m, mi co
		drop _m

		* Bring in school data
		sort schoolid
		merge m:1 schoolid using "`sch_temp'"  	//0 unmatched
		drop _m		
			
		
		*Describe the survey data
		svyset schoolid [pweight=sch_wght], strata(dist_stratum)
		sort stu_serial
		tab end_stat, mi
		tab end_stat [aw=sch_wght], mi 

		* Bring in data on grade 9 students science teachers
		merge 1:1 stu_serial using "`g09scitc'"  	//4924 unmatched (3282 master, 1642 using)
		
		tab _m if s_theta_base~=. & s_theta_end~=. // 88.7% match of those with tests
		tab _m s_baseline, mi //1642 are teachers of endline-only kids, so drop
		drop if _m==2 //1642
		drop _m
		
		* Drop if missing one or both test scores
		drop if s_theta_base==. | s_theta_end==. //2738 
		d,s

		* Normalize both baseline and endline by endline standard deviation
		svy, over(treat): mean s_theta_end 
			mat b=e(b)
			gen mean = b[1,1]
			estat sd
			mat sd = r(sd)
			gen sd = sd[1,1]
			gen s_stdIRT_end=(s_theta_end-mean)/sd
			gen s_stdIRT_base=(s_theta_base-mean)/sd
			drop sd mean
		
		svy, over(treat): mean s_theta_end_SSDP 
			mat b=e(b)
			gen mean = b[1,1]
			estat sd
			mat sd = r(sd)
			gen sd = sd[1,1]
			gen s_stdIRT_end_SSDP=(s_theta_end_SSDP-mean)/sd
			gen s_stdIRT_base_SSDP=(s_theta_base_SSDP-mean)/sd
			drop sd mean
		
		* Generate treat and treated variables
		gen treated=ssdp_s_t*treat
		
		* Save this dataset for later use
		tempfile d9spanel
		save "`d9spanel'", replace
		
									
	*GRADE 9 science PANEL SAMPLE, DESCRIPTIVE STATISTICS
	
		* set survey parameters
		svyset schoolid [pweight=sch_wght], strata(dist_stratum)
		
		* Check that standardized score mean is zero in control sample
		svy, over(treat): mean s_stdIRT_end
		svy, over(treat): mean s_stdIRT_end_SSDP
	
		
	* SETUP FOR COLLECTING P-VALUES (FOR LATER USE IN MULTIPLE HYPOTHESIS TESTING CORRECTIONS)
		gen p1= .   // We will collect unadjusted pvalues for impact estimates in the variable p1
		gen p2= .   // We will collect adjusted pvalues for impact estimates in the variable p2
		gen str20 specification = "to be filled in"  // this will be for verifying that the right p-values are being collected for a given test-taking
		local j= 0   // will increase this by 1 for each specification/pvalue, and use for filling p-values into different observations of the variable p1
	
	
 	* GRADE 9 SCIENCE PANEL SAMPLE, IMPACT ESTIMATION 
		
		* set test score variables
			
			gen score = s_stdIRT_end
			gen score_base = s_stdIRT_base
			
			* create tercile category variable and dummies
			xtile tercilecat = score_base [pweight = sch_wght], nquantiles(3)
			gen bot_3rd= tercilecat==1
			gen mid_3rd = tercilecat==2
			gen top_3rd = tercilecat==3
			summ bot_3rd mid_3rd top_3rd	
			
																					
		* IV/LATE ESTIMATION
								
			* IV estimate with correction for standard errors and confidence intervals following Lee et al. 2022
			* Separate for each tercile of baseline text score distribution
			
			* check for problem strata
				preserve
				keep if bot_3rd==1
				svydescribe if treated~=. // dist_stratum 112 282 372
				restore 
								
				preserve
				keep if mid_3rd==1
				svydescribe if treated~=. // dist_stratum all ok
				restore 
				
				preserve
				keep if top_3rd==1
				svydescribe if treated~=. // dist_stratum all ok
				restore 
				
				capture drop todrop
				gen todrop = dist_stratum==372 | dist_stratum==112 | dist_stratum==282  // too few schools for std error calculation
				
				tab tercilecat if todrop~=1 & treated~=.
			
							
			*  BOTTOM TERCILE
			
				* set sample
				capture drop in_sample
				gen in_sample = todrop~=1 &  bot_3rd==1 			
					
				* first stage regression
				svy: reg treated treat Asnt_aft Math_1st district#stratum if in_sample==1
											
					
				* grab F stat from first stage regression for use in calculating correction to second stage standard errors
					test treat 
					scalar FF=r(F)  // This is the F stat that is the basis for Lee et al. 2022 corrections of second stage standard errors
					di FF   // User needs to choose values from Tables in the Lee paper based on the size of this F stat.
					// 32.47
					
												
				* unadjusted second stage estimation 
					svy: ivregress 2sls score Asnt_aft Math_1st district#stratum (treated = treat) if in_sample==1 
					estimates store g9sci_tercile1
					scalar rsquared=e(r2)
										
				* run program for adjustments 
					
					capture drop testscore
					gen testscore = score
					global reg_command "`e(cmdline)'"
					
					/* If FF is greater than the highest F value lists in Table 3A or Table 3B of Lee et al., fill in the higher F stat value ///
							in the table for the "below" and "above numbers. For example, for FF=237.2, fill in 104.67 for F5_below and F5_above */
					scalar F5_below = 	29.967  //  F in table 3a just below FF 
					scalar F5_above = 	33.457  // F in table 3a just above FF
					scalar A5_below = 	1.200 // adustment factor in 3a for F5below
					scalar A5_above = 	1.173  // adjustment factor in 3a for F5above
					scalar F1_below = 	30.383		// F in table 3b just below FF
					scalar F1_above =   33.634 // F in table 3b just above FF
					scalar A1_below =	1.563		// adjustment factor in 3b for F1below
					scalar A1_above =	1.509		// adjustment factor in 3b for F1above
													
					tFcorrections // running program
							
				* store p-value for later multiple hypothesis testing correction
					local j = `j' + 1
					replace p1 = p_noadj if _n==`j'
					replace p2 = p_adj if _n==`j'
					replace specification = "G9 science bot third" if _n==`j'
		
				* print results
					di "SPECIFICATION IS:  " specification[`j']
					etable, column(index) estimates(g9sci_tercile1) showstars showstarsnote keep(treated) /// 
					cstat(_r_b) cstat(_r_se) cstat(_r_ci) mstat(N) ///
					mstat(r2) stars(.10 "*" .05 "**" .01 "***", attach(_r_b))  // Print unadjusted results
					di "r-squared=  " rsquared  // less rounded
							
					print_res  // print adjusted results for Table 7 
					
														
			*  	MIDDLE TERCILE
			
				* set sample
				capture drop in_sample
				gen in_sample= todrop~=1 & mid_3rd==1 
							
							
				* first stage
				svy: reg treated treat Asnt_aft Math_1st district#stratum if in_sample==1 
							
		
				* grab F stat from first stage for use in calculating correction to second stage standard errors
					test treat 
					scalar FF=r(F)  // This is the F stat that is the basis for Lee et al. 2022 corrections of second stage standard errors
					di FF   // User needs to choose values from Tables in the Lee paper based on the size of this F stat.
					// 59.35					
										
				* unadjusted second stage estimation 
					svy: ivregress 2sls score Asnt_aft Math_1st district#stratum (treated = treat) if in_sample==1 
					estimates store g9sci_tercile2
					scalar rsquared=e(r2)
					
				* run program for adjustments 
					
					capture drop testscore
					gen testscore = score
					
					global reg_command "`e(cmdline)'"
					/* If FF is greater than the highest F value lists in Table 3A or Table 3B of Lee et al., fill in the higher F stat value ///
							in the table for the "below" and "above numbers. For example, for FF=237.2, fill in 104.67 for F5_below and F5_above */
					scalar F5_below = 	57.902 //  F in table 3a just below FF 
					scalar F5_above = 	68.930  // F in table 3a just above FF
					scalar A5_below = 	1.071 // adustment factor in 3a for F5below
					scalar A5_above = 	1.047  // adjustment factor in 3a for F5above
					scalar F1_below = 	56.324		// F in table 3b just below FF
					scalar F1_above = 	66.592		// F in table 3b just above FF
					scalar A1_below =	1.309		// adjustment factor in 3b for F1below
					scalar A1_above =	1.264		// adjustment factor in 3b for F1above
													
					tFcorrections // running program
							
				* store p-value for later multiple hypothesis testing correction
					local j = `j' + 1
					replace p1 = p_noadj if _n==`j'
					replace p2 = p_adj if _n==`j'
					replace specification = "G9 science mid third" if _n==`j'
		
				* print results
					di "SPECIFICATION IS:  " specification[`j']
					etable, column(index) estimates(g9sci_tercile2) showstars showstarsnote keep(treated) /// 
					cstat(_r_b) cstat(_r_se) cstat(_r_ci) mstat(N) ///
					mstat(r2) stars(.10 "*" .05 "**" .01 "***", attach(_r_b))  // Print unadjusted results
					di "r-squared=  " rsquared  // less rounded
							
					print_res  // print adjusted results for Table 7
					
						
					
			*  	TOP TERCILE
			
				* set sample
				capture drop in_sample
				gen in_sample= todrop~=1 & top_3rd==1 
				
							
				* first stage
				svy: reg treated treat Asnt_aft Math_1st district#stratum if in_sample==1 
								
					
				* grab F stat from first stage for use in calculating correction to second stage standard errors
					test treat 
					scalar FF=r(F)  // This is the F stat that is the basis for Lee et al. 2022 corrections of second stage standard errors
					di FF   // User needs to choose values from Tables in the Lee paper based on the size of this F stat.
					// 41.68
					
					
				* unadjusted second stage estimation 
					svy: ivregress 2sls score Asnt_aft Math_1st district#stratum (treated = treat) if in_sample==1 
					estimates store g9sci_tercile3
					scalar rsquared=e(r2)
					
					
				* run program for adjustments 
					
					capture drop testscore
					gen testscore = score
					global reg_command "`e(cmdline)'"
					
					/* If FF is greater than the highest F value lists in Table 3A or Table 3B of Lee et al., fill in the higher F stat value ///
							in the table for the "below" and "above numbers. For example, for FF=237.2, fill in 104.67 for F5_below and F5_above */
					scalar F5_below = 	37.699  //  F in table 3a just below FF 
					scalar F5_above = 	42.930  // F in table 3a just above FF
					scalar A5_below = 	1.147   // adustment factor in 3a for F5below
					scalar A5_above = 	1.121  // adjustment factor in 3a for F5above
					scalar F1_below = 	42.416		// F in table 3b just below FF
					scalar F1_above = 	48.511	// F in table 3b just above FF
					scalar A1_below =	1.406	// adjustment factor in 3b for F1below
					scalar A1_above =	1.357		// adjustment factor in 3b for F1above
													
					tFcorrections // running program
			
		
				* store p-value for later multiple hypothesis testing correction
					local j = `j' + 1
					replace p1 = p_noadj if _n==`j'
					replace p2 = p_adj if _n==`j'
					replace specification = "G9 science top third" if _n==`j'
		
				* print results
					di "SPECIFICATION IS:  " specification[`j']
					etable, column(index) estimates(g9sci_tercile3) showstars showstarsnote keep(treated) /// 
					cstat(_r_b) cstat(_r_se) cstat(_r_ci) mstat(N) ///
					mstat(r2) stars(.10 "*" .05 "**" .01 "***", attach(_r_b))  // Print unadjusted results
					di "r-squared=  " rsquared  // less rounded
							
					print_res  // print adjusted results for Table 7
					
					
		* save the 3 collected p-values and their specifications in a small dataset	
			keep if _n<=3
			keep p1 p2 specification
			tempfile g9sci_tercile
			save "`g9sci_tercile'", replace
			list
	
****************************************************************************

****GRADE 10 SCIENCE PANEL SAMPLE: DATA MERGING AND ESTIMATION OF IMPACTS THAT
****DIFFER BY TERCILE OF THE BASELINE TEST SCORE DISTRIBUTION

****************************************************************************	

		* Bring in panel science grade 10 assessment data 
		use "`p_sci_10'", clear
		
		
		* Select variables and observations 
		drop Sci_* //Only keep latent IRT index, not each assessment item
						
		* Pick observations for which both baseline and endline assessment scores are available 
		sort stu_serial test
		by stu_serial: gen obsnumb=_n
		tab obsnumb 
		
		tab test obsnumb , mi
		bysort stu_serial: egen obscount=max(obsnumb)
		bysort stu_serial: gen first = _n==1
		tab obscount if first==1  // 4615 with 1, 5001 with 2

		drop if obscount==1 | obscount==3 // students appearing only at baseline or only at endline, and those with no serial
		
		
		* Construct baseline and endline score variables (Initially the baseline test and the three endline tests are ///
		in separate observations)
		gen s_theta_base=theta_panel_g9_10 if test=="Sci09"
		gen s_theta_end=theta_panel_g9_10 if test=="Sci10A"|test=="Sci10B"  
		gen s_theta_base_SSDP=theta_panel_g9_10_SSDP if test=="Sci09"
		gen s_theta_end_SSDP=theta_panel_g9_10_SSDP if test=="Sci10A"|test=="Sci10B" 
		keep district schoolid stu_serial test s_theta_base* s_theta_end* 
		sort stu_serial test  
				// this sorting is essential for what follows, each student has baseline score and one of three endline scores
		unique stu_serial  // 5001 students, 10002 obs
		
		* Bring baseline into endline observation for same student
		replace s_theta_base=s_theta_base[_n-1] if s_theta_base==. 
				// for observations with no baseline value (i.e. endline obs) bring in lagged
		replace s_theta_base_SSDP=s_theta_base_SSDP[_n-1] if s_theta_base_SSDP==.  // ditto
		drop if s_theta_end==. // now just one obs per student
		
		l district schoolid stu_serial test s_theta_base s_theta_end in 1/20 
		l district schoolid stu_serial test s_theta_base s_theta_end if schoolid==4201
		drop if stu_serial==""		// 2
		unique stu_serial
		duplicates tag stu_serial, generate(dupprob)
		list stu_serial if dupprob==1
		drop if dupprob==1
		drop dupprob
		
		* Merge test score data with general student data from endline
		merge 1:1 stu_serial using Grade10_c  //4007 unmatched, from using
		
		
		* Construct attrition status using data collected at endline
		tab s_present, m
		tab s_status if s_present==5, m
		gen str8 end_stat="g10prsnt" if s_present>=1 & s_present<=4
		replace end_stat="g10absent" if s_status==1
		replace end_stat="grade9" if s_status==2 
		replace end_stat="othgovsc" if s_status==3 
		replace end_stat="priv_sch" if s_status==4
		replace end_stat="dropout" if s_status==5 
		replace end_stat="unknown" if s_status==6 
		tab end_stat, mi  
		tab end_stat _m, mi co  // 9 problems
		drop if _m==3 & end_stat~="g10prsnt" //9
		tab end_stat s_baseline, mi // end_stat missing only if not in baseline 
		drop _m
		
		* Drop if student not present at baseline
		drop if s_baseline==0
			
		* Bring in school data
		sort schoolid
		merge m:1 schoolid using "`sch_temp'"  	//13 unmatched
		drop if _merge==2
		drop _merge
		
		*Describe the survey data
		svyset schoolid [pweight=sch_wght], strata(dist_stratum)
		sort stu_serial
		tab end_stat, mi
		tab end_stat [aw=sch_wght], mi 

		* Bring in data on grade 10 students science teachers
		merge 1:1 stu_serial using "`g10scitc'"  	//4257 unmatched (3447 master, 810 using)
		
		tab _m if s_theta_base~=. & s_theta_end~=. // 95.8% match of those with tests
		tab _m s_baseline, mi //
		drop if _m==2 //810
		drop _m
		
		* Drop if missing one or both test scores
		drop if s_theta_base==. | s_theta_end==. // 
		unique stu_serial //4990 

		* Normalize both baseline and endline by endline standard deviation
		svy, over(treat): mean s_theta_end 
			mat b=e(b)
			gen mean = b[1,1]
			estat sd
			mat sd = r(sd)
			gen sd = sd[1,1]
			gen s_stdIRT_end=(s_theta_end-mean)/sd
			gen s_stdIRT_base=(s_theta_base-mean)/sd
			drop sd mean
		
		svy, over(treat): mean s_theta_end_SSDP 
			mat b=e(b)
			gen mean = b[1,1]
			estat sd
			mat sd = r(sd)
			gen sd = sd[1,1]
			gen s_stdIRT_end_SSDP=(s_theta_end_SSDP-mean)/sd
			gen s_stdIRT_base_SSDP=(s_theta_base_SSDP-mean)/sd
			drop sd mean
		
		* Generate treat and treated variables
		gen treated=ssdp_s_t*treat
		
		* Save this dataset for later use
		tempfile d10spanel
		save "`d10spanel'", replace
	
									
	*GRADE 10 SCIENCE PANEL SAMPLE, DESCRIPTIVE STATISTICS
	
		* set survey parameters
		svyset schoolid [pweight=sch_wght], strata(dist_stratum)
		
		* Check that standardized score mean is zero in control sample
		svy, over(treat): mean s_stdIRT_end
		svy, over(treat): mean s_stdIRT_end_SSDP
	
	
	* SETUP FOR COLLECTING P-VALUES (FOR LATER USE IN MULTIPLE HYPOTHESIS TESTING CORRECTIONS)
		gen p1= .   // We will collect unadjusted pvalues for impact estimates in the variable p1
		gen p2= .   // We will collect adjusted pvalues for impact estimates in the variable p2
		gen str20 specification = "to be filled in"  // this will be for verifying that the right p-values are being collected for a given test-taking
		local j= 0   // will increase this by 1 for each specification/pvalue, and use for filling p-values into different observations of the variable p1
	
	* GRADE 10 SCIENCE PANEL SAMPLE, IMPACT ESTIMATION 
		
		* set test score variables
			
			gen score = s_stdIRT_end
			gen score_base = s_stdIRT_base
			
			* create tercile category variable and dummies
			xtile tercilecat = score_base [pweight = sch_wght], nquantiles(3)
			gen bot_3rd= tercilecat==1
			gen mid_3rd = tercilecat==2
			gen top_3rd = tercilecat==3
			summ bot_3rd mid_3rd top_3rd	
			
			
																					
		* IV/LATE ESTIMATION
								
			* IV estimate with correction for standard errors and confidence intervals following Lee et al. 2022
			* Separate for each tercile of baseline text score distribution
			
			* check for problem strata
				preserve
				keep if bot_3rd==1
				svydescribe if treated~=. // dist_stratum 342 372
				restore 
				
				preserve
				keep if mid_3rd==1
				svydescribe if treated~=. // dist_stratum 372
				restore 
				
				preserve
				keep if top_3rd==1
				svydescribe if treated~=. // dist_stratum all ok
				restore 
				
				capture drop todrop
				gen todrop = dist_stratum==372 | dist_stratum==342  // too few schools for std error calculation
				
				tab tercilecat if todrop~=1 & treated~=.
			
							
			*  BOTTOM TERCILE
			
				* set sample
				capture drop in_sample
				gen in_sample = todrop~=1 & bot_3rd==1 
			
					
				* first stage regression
				svy: reg treated treat Asnt_aft Math_1st district#stratum if in_sample==1
											
					
				* grab F stat from first stage regression for use in calculating correction to second stage standard errors
					test treat 
					scalar FF=r(F)  // This is the F stat that is the basis for Lee et al. 2022 corrections of second stage standard errors
					di FF   // User needs to choose values from Tables in the Lee paper based on the size of this F stat.
					// 58.24
					
												
				* unadjusted second stage estimation 
					svy: ivregress 2sls score Asnt_aft Math_1st district#stratum (treated = treat) if in_sample==1 
					estimates store g10sci_tercile1
					scalar rsquared=e(r2)
										
				* run program for adjustments 
					
					capture drop testscore
					gen testscore = score
					global reg_command "`e(cmdline)'"
					
					/* If FF is greater than the highest F value lists in Table 3A or Table 3B of Lee et al., fill in the higher F stat value ///
							in the table for the "below" and "above numbers. For example, for FF=237.2, fill in 104.67 for F5_below and F5_above */
					scalar F5_below = 	57.902 //  F in table 3a just below FF 
					scalar F5_above = 	68.930  // F in table 3a just above FF
					scalar A5_below = 	1.071 // adustment factor in 3a for F5below
					scalar A5_above = 	1.047 // adjustment factor in 3a for F5above
					scalar F1_below = 	56.324		// F in table 3b just below FF
					scalar F1_above =   66.592 // F in table 3b just above FF
					scalar A1_below =	1.309		// adjustment factor in 3b for F1below
					scalar A1_above =	1.264		// adjustment factor in 3b for F1above
													
					tFcorrections // running program
							
				* store p-value for later multiple hypothesis testing correction
					local j = `j' + 1
					replace p1 = p_noadj if _n==`j'
					replace p2 = p_adj if _n==`j'
					replace specification = "G10 science bot third" if _n==`j'
		
				* print results
					di "SPECIFICATION IS:  " specification[`j']
					etable, column(index) estimates(g10sci_tercile1) showstars showstarsnote keep(treated) /// 
					cstat(_r_b) cstat(_r_se) cstat(_r_ci) mstat(N) ///
					mstat(r2) stars(.10 "*" .05 "**" .01 "***", attach(_r_b))  // Print unadjusted results
					di "r-squared=  " rsquared  // less rounded
							
					print_res  // print adjusted results for Table 7   
					
														
			*  	MIDDLE TERCILE
			
				* set sample
				capture drop in_sample
				gen in_sample= todrop~=1 & mid_3rd==1 
							
							
				* first stage
				svy: reg treated treat Asnt_aft Math_1st district#stratum if in_sample==1 
							
		
				* grab F stat from first stage for use in calculating correction to second stage standard errors
					test treat 
					scalar FF=r(F)  // This is the F stat that is the basis for Lee et al. 2022 corrections of second stage standard errors
					di FF   // User needs to choose values from Tables in the Lee paper based on the size of this F stat.
					// 74.87					
										
				* unadjusted second stage estimation 
					svy: ivregress 2sls score Asnt_aft Math_1st district#stratum (treated = treat) if in_sample==1 
					estimates store g10sci_tercile2
					scalar rsquared=e(r2)
					
				* run program for adjustments 
					
					capture drop testscore
					gen testscore = score
					
					global reg_command "`e(cmdline)'"
					/* If FF is greater than the highest F value lists in Table 3A or Table 3B of Lee et al., fill in the higher F stat value ///
							in the table for the "below" and "above numbers. For example, for FF=237.2, fill in 104.67 for F5_below and F5_above */
					scalar F5_below = 	68.930 //  F in table 3a just below FF 
					scalar F5_above = 	83.823  // F in table 3a just above FF
					scalar A5_below = 	1.047 // adustment factor in 3a for F5below
					scalar A5_above = 	1.024  // adjustment factor in 3a for F5above
					scalar F1_below = 	66.592		// F in table 3b just below FF
					scalar F1_above = 	80.502		// F in table 3b just above FF
					scalar A1_below =	1.264		// adjustment factor in 3b for F1below
					scalar A1_above =	1.220		// adjustment factor in 3b for F1above
													
					tFcorrections // running program
							
				* store p-value for later multiple hypothesis testing correction
					local j = `j' + 1
					replace p1 = p_noadj if _n==`j'
					replace p2 = p_adj if _n==`j'
					replace specification = "G10 science mid third" if _n==`j'
		
				* print results
					di "SPECIFICATION IS:  " specification[`j']
					etable, column(index) estimates(g10sci_tercile2) showstars showstarsnote keep(treated) /// 
					cstat(_r_b) cstat(_r_se) cstat(_r_ci) mstat(N) ///
					mstat(r2) stars(.10 "*" .05 "**" .01 "***", attach(_r_b))  // Print unadjusted results
					di "r-squared=  " rsquared  // less rounded
							
					print_res  // print adjusted results for Table 7
					
						
					
			*  	TOP TERCILE
			
			* set sample
				capture drop in_sample
				gen in_sample= todrop~=1 & top_3rd==1 
				
							
				* first stage
				svy: reg treated treat Asnt_aft Math_1st district#stratum if in_sample==1 
								
					
				* grab F stat from first stage for use in calculating correction to second stage standard errors
					test treat 
					scalar FF=r(F)  // This is the F stat that is the basis for Lee et al. 2022 corrections of second stage standard errors
					di FF   // User needs to choose values from Tables in the Lee paper based on the size of this F stat.
					// 47.76
					
					
				* unadjusted second stage estimation 
					svy: ivregress 2sls score Asnt_aft Math_1st district#stratum (treated = treat) if in_sample==1 
					estimates store g10sci_tercile3
					scalar rsquared=e(r2)
					
					
				* run program for adjustments 
					
					capture drop testscore
					gen testscore = score
					global reg_command "`e(cmdline)'"
					
					/* If FF is greater than the highest F value lists in Table 3A or Table 3B of Lee et al., fill in the higher F stat value ///
							in the table for the "below" and "above numbers. For example, for FF=237.2, fill in 104.67 for F5_below and F5_above */
					scalar F5_below = 	42.930  //  F in table 3a just below FF 
					scalar F5_above = 	49.495  // F in table 3a just above FF
					scalar A5_below = 	1.121  // adustment factor in 3a for F5below
					scalar A5_above = 	1.096  // adjustment factor in 3a for F5above
					scalar F1_below = 	42.416		// F in table 3b just below FF
					scalar F1_above = 	48.511	// F in table 3b just above FF
					scalar A1_below =	1.406	// adjustment factor in 3b for F1below
					scalar A1_above =	1.357		// adjustment factor in 3b for F1above
													
					tFcorrections // running program
			
		
				* store p-value for later multiple hypothesis testing correction
					local j = `j' + 1
					replace p1 = p_noadj if _n==`j'
					replace p2 = p_adj if _n==`j'
					replace specification = "G10 science top third" if _n==`j'
		
				* print results
					di "SPECIFICATION IS:  " specification[`j']
					etable, column(index) estimates(g10sci_tercile3) showstars showstarsnote keep(treated) /// 
					cstat(_r_b) cstat(_r_se) cstat(_r_ci) mstat(N) ///
					mstat(r2) stars(.10 "*" .05 "**" .01 "***", attach(_r_b))  // Print unadjusted results
					di "r-squared=  " rsquared  // less rounded
							
					print_res  // print adjusted results for Table 7
					
			
		* save the 3 collected p-values and their specifications in a small dataset	
			keep if _n<=3
			keep p1 p2 specification
			tempfile g10sci_tercile
			save "`g10sci_tercile'", replace
			list
			clear	
	

****************************************************************************

****POOLED GRADES 9 AND 10 MATH PANEL SAMPLE DATA MERGING AND ESTIMATION OF IMPACTS THAT
****DIFFER BY TERCILE OF THE BASELINE TEST SCORE DISTRIBUTION

****************************************************************************	

		use "`d9mpanel'", clear
		gen grade10=0
		
		append using "`d10mpanel'"
		replace grade10=1 if grade10==.
		gen score= m_stdIRT_end
		gen score_base = m_stdIRT_base
		
		* check stats
		tab grade10, miss
		tab grade10, summ(score)

		* set test score variables
				
			* create tercile category variable and dummies, separately by grade
			* grade 9 
			xtile tercilecat9 = score_base [pweight = sch_wght] if grade10==0, nquantiles(3)
			xtile tercilecat10 = score_base [pweight = sch_wght] if grade10==1, nquantiles(3)
			gen tercilecat = tercilecat9 if grade10==0
			replace tercilecat = tercilecat10 if grade10==1
			
			gen bot_3rd= tercilecat==1
			gen mid_3rd = tercilecat==2
			gen top_3rd = tercilecat==3
			summ bot_3rd mid_3rd top_3rd	
		
	
	* SETUP FOR COLLECTING P-VALUES (FOR LATER USE IN MULTIPLE HYPOTHESIS TESTING CORRECTIONS)
		gen p1= .   // We will collect unadjusted pvalues for impact estimates in the variable p1
		gen p2= .   // We will collect adjusted pvalues for impact estimates in the variable p2
		gen str20 specification = "to be filled in"  // this will be for verifying that the right p-values are being collected for a given test-taking
		local j= 0   // will increase this by 1 for each specification/pvalue, and use for filling p-values into different observations of the variable p1
	
	* REPLICATE GRADE 9 MATH FULL SAMPLE RESULTS, WITHOUT STD ERROR CORRECTIONS FOR LATE (TO VERIFY THAT POOLING INTRODUCES NO ERRORS)
		
		* New grade 9
		
		svy: ivregress 2sls score Asnt_aft Math_1st district#stratum (treated = treat) if grade10==0  & bot_3rd==1 & dist_stratum~=372 & dist_stratum~=22 & dist_stratum~=632
		test treated
		estimates store g9math_tercile1_p
							
		* SHOW COMPARISON						 
		etable, column(estimates) estimates(g9math_tercile1 g9math_tercile1_p) showstars showstarsnote keep(treat treated) ///
		cstat(_r_b) cstat(_r_se) cstat(_r_ci) mstat(N) mstat(r2) stars(.10 "*" .05 "**" .01 "***", attach(_r_b))
		// matches
		
						
	* REPLICATE GRADE 10 MATH FULL SAMPLE RESULTS, WITHOUT STD ERROR CORRECTIONS FOR LATE (TO VERIFY THAT POOLING INTRODUCES NO ERRORS)
	
		* New grade 10
		svy: ivregress 2sls score Asnt_aft Math_1st district#stratum (treated = treat) if grade10==1  & bot_3rd==1 & dist_stratum~=372 & dist_stratum~=282 
		test treated
		estimates store g10math_tercile1_p
							
		* SHOW COMPARISON						 
		etable, column(estimates) estimates(g10math_tercile1 g10math_tercile1_p) showstars showstarsnote keep(treat treated) ///
		cstat(_r_b) cstat(_r_se) cstat(_r_ci) mstat(N) mstat(r2) stars(.10 "*" .05 "**" .01 "***", attach(_r_b))
		// matches
					

	** POOLED ESTIMATION 
	
		* Adjust stratum variable
		tab stratum
		tab district 
		summ dist_stratum
		gen year_dist_strat = (grade10*1000)+dist_stratum
		*tab year_dist_strat
	
		* define survey structure
		svyset schoolid [pweight=sch_wght], strata(year_dist_strat) 
							
				
		* LATE ESTIMATION	
		
		*  BOTTOM TERCILE
			
				* set sample
				capture drop todrop
				egen todrop = anymatch(year_dist_strat), values(372 22 632 1282 1372)
				
				tab tercilecat if treated~=. & todrop~=1
				
				capture drop in_sample				
				gen in_sample = todrop~=1 & bot_3rd==1
				
												
				* first stage regression
				svy: reg treated treat Asnt_aft Math_1st district#stratum if in_sample==1
											
					
				* grab F stat from first stage regression for use in calculating correction to second stage standard errors
					test treat 
					scalar FF=r(F)  // This is the F stat that is the basis for Lee et al. 2022 corrections of second stage standard errors
					di FF   // User needs to choose values from Tables in the Lee paper based on the size of this F stat.
					// 292.43
					
												
				* unadjusted second stage estimation 
					svy: ivregress 2sls score Asnt_aft Math_1st district#stratum (treated = treat) if in_sample==1 
					estimates store pooled_math_tercile1
					scalar rsquared=e(r2)
										
				* run program for adjustments 
					
					capture drop testscore
					gen testscore = score
					global reg_command "`e(cmdline)'"
					
					/* If FF is greater than the highest F value lists in Table 3A or Table 3B of Lee et al., fill in the higher F stat value ///
							in the table for the "below" and "above numbers. For example, for FF=237.2, fill in 104.67 for F5_below and F5_above */
					scalar F5_below = 	104.67 //  F in table 3a just below FF 
					scalar F5_above = 	104.67  // F in table 3a just above FF
					scalar A5_below = 	1.00 // adustment factor in 3a for F5below
					scalar A5_above = 	1.00 // adjustment factor in 3a for F5above
					scalar F1_below = 	252.342		// F in table 3b just below FF
					scalar F1_above =   252.342 // F in table 3b just above FF
					scalar A1_below =	1.059		// adjustment factor in 3b for F1below
					scalar A1_above =	1.059		// adjustment factor in 3b for F1above
													
					tFcorrections // running program
							
				* store p-value for later multiple hypothesis testing correction
					local j = `j' + 1
					replace p1 = p_noadj if _n==`j'
					replace p2 = p_adj if _n==`j'
					replace specification = "pooled math bot third" if _n==`j'
		
				* print results
					di "SPECIFICATION IS:  " specification[`j']
					etable, column(index) estimates(pooled_math_tercile1) showstars showstarsnote keep(treated) /// 
					cstat(_r_b) cstat(_r_se) cstat(_r_ci) mstat(N) ///
					mstat(r2) stars(.10 "*" .05 "**" .01 "***", attach(_r_b))  // Print unadjusted results
					di "r-squared=  " rsquared  // less rounded
							
					print_res  // print adjusted results for Table 7  
					
					
														
			*  	MIDDLE TERCILE
			
				* set sample
				capture drop in_sample
				gen in_sample= todrop~=1 & mid_3rd==1 							
							
				* first stage
				svy: reg treated treat Asnt_aft Math_1st district#stratum if in_sample==1 
							
		
				* grab F stat from first stage for use in calculating correction to second stage standard errors
					test treat 
					scalar FF=r(F)  // This is the F stat that is the basis for Lee et al. 2022 corrections of second stage standard errors
					di FF   // User needs to choose values from Tables in the Lee paper based on the size of this F stat.
					// 400.83
										
				* unadjusted second stage estimation 
					svy: ivregress 2sls score Asnt_aft Math_1st district#stratum (treated = treat) if in_sample==1 
					estimates store pooled_math_tercile2
					scalar rsquared=e(r2)
					
				* run program for adjustments 
					
					capture drop testscore
					gen testscore = score
					
					global reg_command "`e(cmdline)'"
					/* If FF is greater than the highest F value lists in Table 3A or Table 3B of Lee et al., fill in the higher F stat value ///
							in the table for the "below" and "above numbers. For example, for FF=237.2, fill in 104.67 for F5_below and F5_above */
					scalar F5_below = 	104.67 //  F in table 3a just below FF 
					scalar F5_above = 	104.67  // F in table 3a just above FF
					scalar A5_below = 	1.00 // adustment factor in 3a for F5below
					scalar A5_above = 	1.00 // adjustment factor in 3a for F5above
					scalar F1_below = 	252.342		// F in table 3b just below FF
					scalar F1_above =   252.342 // F in table 3b just above FF
					scalar A1_below =	1.059		// adjustment factor in 3b for F1below
					scalar A1_above =	1.059		// adjustment factor in 3b for F1above
													
					tFcorrections // running program
							
				* store p-value for later multiple hypothesis testing correction
					local j = `j' + 1
					replace p1 = p_noadj if _n==`j'
					replace p2 = p_adj if _n==`j'
					replace specification = "pooled math mid third" if _n==`j'
		
				* print results
					di "SPECIFICATION IS:  " specification[`j']
					etable, column(index) estimates(pooled_math_tercile2) showstars showstarsnote keep(treated) /// 
					cstat(_r_b) cstat(_r_se) cstat(_r_ci) mstat(N) ///
					mstat(r2) stars(.10 "*" .05 "**" .01 "***", attach(_r_b))  // Print unadjusted results
					di "r-squared=  " rsquared  // less rounded
							
					print_res  // print adjusted results for Table 7
					
						
					
			*  	TOP TERCILE
							
				* set sample
				capture drop in_sample
				gen in_sample= todrop~=1 & top_3rd==1 			
							
				* first stage
				svy: reg treated treat Asnt_aft Math_1st district#stratum if in_sample==1 
								
					
				* grab F stat from first stage for use in calculating correction to second stage standard errors
					test treat 
					scalar FF=r(F)  // This is the F stat that is the basis for Lee et al. 2022 corrections of second stage standard errors
					di FF   // User needs to choose values from Tables in the Lee paper based on the size of this F stat.
					// 191.988
					
					
				* unadjusted second stage estimation 
					svy: ivregress 2sls score Asnt_aft Math_1st district#stratum (treated = treat) if in_sample==1 
					estimates store pooled_math_tercile3
					scalar rsquared=e(r2)
					
					
				* run program for adjustments 
					
					capture drop testscore
					gen testscore = score
					global reg_command "`e(cmdline)'"
					
					/* If FF is greater than the highest F value lists in Table 3A or Table 3B of Lee et al., fill in the higher F stat value ///
							in the table for the "below" and "above numbers. For example, for FF=237.2, fill in 104.67 for F5_below and F5_above */
					scalar F5_below = 	104.67  //  F in table 3a just below FF 
					scalar F5_above = 	104.67  // F in table 3a just above FF
					scalar A5_below = 	1.000  // adustment factor in 3a for F5below
					scalar A5_above = 	1.000  // adjustment factor in 3a for F5above
					scalar F1_below = 	174.370		// F in table 3b just below FF
					scalar F1_above = 	252.342 // F in table 3b just above FF
					scalar A1_below =	1.097	// adjustment factor in 3b for F1below
					scalar A1_above =	1.059		// adjustment factor in 3b for F1above
													
					tFcorrections // running program
			
		
				* store p-value for later multiple hypothesis testing correction
					local j = `j' + 1
					replace p1 = p_noadj if _n==`j'
					replace p2 = p_adj if _n==`j'
					replace specification = "pooled math top third" if _n==`j'
		
				* print results
					di "SPECIFICATION IS:  " specification[`j']
					etable, column(index) estimates(pooled_math_tercile3) showstars showstarsnote keep(treated) /// 
					cstat(_r_b) cstat(_r_se) cstat(_r_ci) mstat(N) ///
					mstat(r2) stars(.10 "*" .05 "**" .01 "***", attach(_r_b))  // Print unadjusted results
					di "r-squared=  " rsquared  // less rounded
							
					print_res  // print adjusted results for Table 7
					
			
		* save the 3 collected p-values and their specifications in a small dataset	
			keep if _n<=3
			keep p1 p2 specification
			tempfile pooled_math_tercile
			save "`pooled_math_tercile'", replace
			list
			clear	
			

****************************************************************************

****POOLED GRADES 9 AND 10 SCIENCE PANEL SAMPLE: DATA MERGING AND ESTIMATION OF IMPACTS THAT
****DIFFER BY TERCILE OF THE BASELINE TEST SCORE DISTRIBUTION

****************************************************************************	

		use "`d9spanel'", clear
		gen grade10=0
		
		append using "`d10spanel'"
		replace grade10=1 if grade10==.
		gen score= s_stdIRT_end
		gen score_base = s_stdIRT_base
		
		* check stats
		tab grade10, miss
		tab grade10, summ(score)
				
			
		* set test score variables
							
			* create tercile category variable and dummies, separately by grade
			* grade 9 
			xtile tercilecat9 = score_base [pweight = sch_wght] if grade10==0, nquantiles(3)
			xtile tercilecat10 = score_base [pweight = sch_wght] if grade10==1, nquantiles(3)
			gen tercilecat = tercilecat9 if grade10==0
			replace tercilecat = tercilecat10 if grade10==1
			
			gen bot_3rd= tercilecat==1
			gen mid_3rd = tercilecat==2
			gen top_3rd = tercilecat==3
			summ bot_3rd mid_3rd top_3rd	
	
		
	* SETUP FOR COLLECTING P-VALUES (FOR LATER USE IN MULTIPLE HYPOTHESIS TESTING CORRECTIONS)
		gen p1= .   // We will collect unadjusted pvalues for impact estimates in the variable p1
		gen p2= .   // We will collect adjusted pvalues for impact estimates in the variable p2
		gen str20 specification = "to be filled in"  // this will be for verifying that the right p-values are being collected for a given test-taking
		local j= 0   // will increase this by 1 for each specification/pvalue, and use for filling p-values into different observations of the variable p1
	
	* REPLICATE GRADE 9 SCIENCE FULL SAMPLE RESULTS, WITHOUT STD ERROR CORRECTIONS FOR LATE (TO VERIFY THAT POOLING INTRODUCES NO ERRORS)
		
		* New grade 9
		svy: ivregress 2sls score Asnt_aft Math_1st district#stratum (treated = treat) if grade10==0  & bot_3rd==1 & dist_stratum~=112 & dist_stratum~=282 & dist_stratum~=372 
		test treated
		estimates store g9sci_tercile1_p
							
		* SHOW COMPARISON						 
		etable, column(estimates) estimates(g9sci_tercile1 g9sci_tercile1_p) showstars showstarsnote keep(treat treated) ///
		cstat(_r_b) cstat(_r_se) cstat(_r_ci) mstat(N) mstat(r2) stars(.10 "*" .05 "**" .01 "***", attach(_r_b))
		// matches
		
						
	* REPLICATE GRADE 10 SCIENCE FULL SAMPLE RESULTS, WITHOUT STD ERROR CORRECTIONS FOR LATE (TO VERIFY THAT POOLING INTRODUCES NO ERRORS)
	
		* New grade 10
		svy: ivregress 2sls score Asnt_aft Math_1st district#stratum (treated = treat) if grade10==1  & bot_3rd==1 & dist_stratum~=372 & dist_stratum~=342
		test treated
		estimates store g10sci_tercile1_p
							
		* SHOW COMPARISON						 
		etable, column(estimates) estimates(g10sci_tercile1 g10sci_tercile1_p) showstars showstarsnote keep(treat treated) ///
		cstat(_r_b) cstat(_r_se) cstat(_r_ci) mstat(N) mstat(r2) stars(.10 "*" .05 "**" .01 "***", attach(_r_b))
		// matches
						

	** POOLED ESTIMATION 
			
		* Adjust stratum variable
		tab stratum
		tab district 
		summ dist_stratum
		gen year_dist_strat = (grade10*1000)+dist_stratum
		*tab year_dist_strat
	
		* set survey design
		svyset schoolid [pweight=sch_wght], strata(year_dist_strat) 
				
					
		* LATE ESTIMATION	
		
			capture drop todrop
			egen todrop=anymatch(year_dist_strat), values(112 282 372 1342 1372)
			
			tab tercilecat if treated~=. & todrop~=1
		
					
		*  BOTTOM TERCILE
											
				* set sample
				capture drop in_sample
				gen in_sample = todrop~=1 & bot_3rd==1 
								
				* first stage regression
				svy: reg treated treat Asnt_aft Math_1st district#stratum if in_sample==1
											
					
				* grab F stat from first stage regression for use in calculating correction to second stage standard errors
					test treat 
					scalar FF=r(F)  // This is the F stat that is the basis for Lee et al. 2022 corrections of second stage standard errors
					di FF   // User needs to choose values from Tables in the Lee paper based on the size of this F stat.
					// 73.95
					
												
				* unadjusted second stage estimation 
					svy: ivregress 2sls score Asnt_aft Math_1st district#stratum (treated = treat) if in_sample==1 
					estimates store pooled_sci_tercile1
					scalar rsquared=e(r2)
										
				* run program for adjustments 
					
					capture drop testscore
					gen testscore = score
					global reg_command "`e(cmdline)'"
					
					/* If FF is greater than the highest F value lists in Table 3A or Table 3B of Lee et al., fill in the higher F stat value ///
							in the table for the "below" and "above numbers. For example, for FF=237.2, fill in 104.67 for F5_below and F5_above */
					scalar F5_below = 	68.930 //  F in table 3a just below FF 
					scalar F5_above = 	83.823  // F in table 3a just above FF
					scalar A5_below = 	1.047 // adustment factor in 3a for F5below
					scalar A5_above = 	1.024 // adjustment factor in 3a for F5above
					scalar F1_below = 	66.592		// F in table 3b just below FF
					scalar F1_above =   80.502  // F in table 3b just above FF
					scalar A1_below =	1.264		// adjustment factor in 3b for F1below
					scalar A1_above =	1.220		// adjustment factor in 3b for F1above
													
					tFcorrections // running program
							
				* store p-value for later multiple hypothesis testing correction
					local j = `j' + 1
					replace p1 = p_noadj if _n==`j'
					replace p2 = p_adj if _n==`j'
					replace specification = "pooled sci bot third" if _n==`j'
		
				* print results
					di "SPECIFICATION IS:  " specification[`j']
					etable, column(index) estimates(pooled_sci_tercile1) showstars showstarsnote keep(treated) /// 
					cstat(_r_b) cstat(_r_se) cstat(_r_ci) mstat(N) ///
					mstat(r2) stars(.10 "*" .05 "**" .01 "***", attach(_r_b))  // Print unadjusted results
					di "r-squared=  " rsquared  // less rounded
							
					print_res  // print adjusted results for Table 7  
					
																			
			*  	MIDDLE TERCILE
			
				* set sample
				capture drop in_sample
				gen in_sample= todrop~=1 & mid_3rd==1  						
							
				* first stage
				svy: reg treated treat Asnt_aft Math_1st district#stratum if in_sample==1 
							
		
				* grab F stat from first stage for use in calculating correction to second stage standard errors
					test treat 
					scalar FF=r(F)  // This is the F stat that is the basis for Lee et al. 2022 corrections of second stage standard errors
					di FF   // User needs to choose values from Tables in the Lee paper based on the size of this F stat.
					// 113.62
										
				
				* unadjusted second stage estimation 
					svy: ivregress 2sls score Asnt_aft Math_1st district#stratum (treated = treat) if in_sample==1 
					estimates store pooled_sci_tercile2
					scalar rsquared=e(r2)
					
				* run program for adjustments 
					
					capture drop testscore
					gen testscore = score
					
					global reg_command "`e(cmdline)'"
					/* If FF is greater than the highest F value lists in Table 3A or Table 3B of Lee et al., fill in the higher F stat value ///
							in the table for the "below" and "above numbers. For example, for FF=237.2, fill in 104.67 for F5_below and F5_above */
					scalar F5_below = 	104.67 //  F in table 3a just below FF 
					scalar F5_above = 	104.67  // F in table 3a just above FF
					scalar A5_below = 	1.00 // adustment factor in 3a for F5below
					scalar A5_above = 	1.00 // adjustment factor in 3a for F5above
					scalar F1_below = 	100.069	// F in table 3b just below FF
					scalar F1_above =   128.950 // F in table 3b just above FF
					scalar A1_below =	1.177		// adjustment factor in 3b for F1below
					scalar A1_above =	1.136		// adjustment factor in 3b for F1above
													
					tFcorrections // running program
							
				* store p-value for later multiple hypothesis testing correction
					local j = `j' + 1
					replace p1 = p_noadj if _n==`j'
					replace p2 = p_adj if _n==`j'
					replace specification = "pooled sci mid third" if _n==`j'
		
				* print results
					di "SPECIFICATION IS:  " specification[`j']
					etable, column(index) estimates(pooled_sci_tercile2) showstars showstarsnote keep(treated) /// 
					cstat(_r_b) cstat(_r_se) cstat(_r_ci) mstat(N) ///
					mstat(r2) stars(.10 "*" .05 "**" .01 "***", attach(_r_b))  // Print unadjusted results
					di "r-squared=  " rsquared  // less rounded
							
					print_res  // print adjusted results for Table 7
					
											
			*  	TOP TERCILE
			
				* set sample
				capture drop in_sample
				gen in_sample= todrop~=1 & top_3rd==1
				
							
				* first stage
				svy: reg treated treat Asnt_aft Math_1st district#stratum if in_sample==1 
								
					
				* grab F stat from first stage for use in calculating correction to second stage standard errors
					test treat 
					scalar FF=r(F)  // This is the F stat that is the basis for Lee et al. 2022 corrections of second stage standard errors
					di FF   // User needs to choose values from Tables in the Lee paper based on the size of this F stat.
					// 87.59
					
					
				* unadjusted second stage estimation 
					svy: ivregress 2sls score Asnt_aft Math_1st district#stratum (treated = treat) if in_sample==1 
					estimates store pooled_sci_tercile3
					scalar rsquared=e(r2)
					
					
				* run program for adjustments 
					
					capture drop testscore
					gen testscore = score
					global reg_command "`e(cmdline)'"
					
					/* If FF is greater than the highest F value lists in Table 3A or Table 3B of Lee et al., fill in the higher F stat value ///
							in the table for the "below" and "above numbers. For example, for FF=237.2, fill in 104.67 for F5_below and F5_above */
					scalar F5_below = 	83.823  //  F in table 3a just below FF 
					scalar F5_above = 	104.67  // F in table 3a just above FF
					scalar A5_below = 	1.024  // adustment factor in 3a for F5below
					scalar A5_above = 	1.000  // adjustment factor in 3a for F5above
					scalar F1_below = 	80.502		// F in table 3b just below FF
					scalar F1_above = 	100.069 // F in table 3b just above FF
					scalar A1_below =	1.220	// adjustment factor in 3b for F1below
					scalar A1_above =	1.177		// adjustment factor in 3b for F1above
													
					tFcorrections // running program
			
		
				* store p-value for later multiple hypothesis testing correction
					local j = `j' + 1
					replace p1 = p_noadj if _n==`j'
					replace p2 = p_adj if _n==`j'
					replace specification = "pooled sci top third" if _n==`j'
		
				* print results
					di "SPECIFICATION IS:  " specification[`j']
					etable, column(index) estimates(pooled_sci_tercile3) showstars showstarsnote keep(treated) /// 
					cstat(_r_b) cstat(_r_se) cstat(_r_ci) mstat(N) ///
					mstat(r2) stars(.10 "*" .05 "**" .01 "***", attach(_r_b))  // Print unadjusted results
					di "r-squared=  " rsquared  // less rounded
							
					print_res  // print adjusted results for Table 7
					
							
		* save the 3 collected p-values and their specifications in a small dataset	
			keep if _n<=3
			keep p1 p2 specification
			tempfile pooled_sci_tercile
			save "`pooled_sci_tercile'", replace
			list
						
				clear
									
***************************************************************************

****MULTIPLE HYPOTHESIS TESTING CORRECTIONS FOR TERCILE REGRESSIONS

****************************************************************************	

	* READ IN ALL THE COLLECTED P-VALUES from impact estimation by tercile
		
			use "`g9math_tercile'"
			append using "`g10math_tercile'"
			append using "`g9sci_tercile'"
			append using "`g10sci_tercile'"
			append using "`pooled_math_tercile'"
			append using "`pooled_sci_tercile'"
			list
			
			save collected_pvals, replace 
			
				
	*  SAVE ALL
			tempfile alltercilepvalues
			save "`alltercilepvalues'", replace 
			
	* KEEP ONLY THOSE FOR THE GRADE-SPECIFIC ESTIMATION 

		keep if _n<=12 /* only the grade-specific estimates */
		
		* p-value adjustments using the raw p-values
		qqvalue p1, method(yekutieli) qvalue(q1)  // this uses unadjusted pvalues for late
		list specification p1 q1
		
		* p-value adjustments using the p-values after using the Lee et al correction based on first stage F stat
		
		qqvalue p2, method(yekutieli) qvalue(q2)  // this uses adjusted pvalues for late
		list specification p2 q2
		
	
****************************************************************************

****GRADE 9 MATH FULL ENDLINE SAMPLE:  QUANTILE REGRESSIONS 

****************************************************************************	

		* Bring in endline math grade 9 assessment data 
		use "`e_math_9'", clear
		
		*create raw score
		egen RawMath9=rowtotal(Math_*)
		drop Math_*
		
		* merge in school info
		sort schoolid
		merge m:1 schoolid using "`sch_temp'" //0 unmatched
		
		drop _m
		sort stu_serial
		
		* Merge in data on grade 9 students
			merge 1:1 stu_serial using Grade09_c
			
		* Check for availability of father education variable
			tab f_educlevel _m, mi /* Missing for only 8 of matched observations */
			l district schoolid stu_serial if _m==1
			l district schoolid stu_serial if _m==3 & f_educlevel==.
			drop if _m~=3
			drop _m

		* Merge in data on grade 9 students matched with math teachers
			merge 1:1 stu_serial using "`g09mattc'" //760 unmatched (752 master, 8 using)
			
			drop if _m==2 //8
			drop _m

		* Standardize grade 9 math IRT score, 1st for all items then for SSDP-focus items and for raw scores
			svy, over(treat): mean theta_gr09 
			mat b = e(b)
			gen mean = b[1,1]
			estat sd
			mat sd = r(sd)
			gen sd = sd[1,1]
			gen stdIRTmatgr9=(theta_gr09-mean)/sd
			drop mean sd
	
			svy, over(treat): mean theta_gr09_SSDP 
			mat b = e(b)
			gen mean = b[1,1]
			estat sd
			mat sd = r(sd)
			gen sd = sd[1,1]
			gen stdIRTmatgr9_SSDP=(theta_gr09_SSDP-mean)/sd
			drop mean sd
							
		* Generate dummy variable for LATE regressions: teacher actually trained in relevant SSDP training
			gen tchrtreat_m=ssdp_m_t*treat
			
		*Label variables
			la var stdIRTmatgr9 "Grade 9 Math - All"
			la var stdIRTmatgr9_SSDP "Grade 9 Math - SSDP"
			
			
		* Save data for grade 9, math, full sample
			gen treated=tchrtreat_m
		
			tempfile d9mfull
			save "`d9mfull'", replace		

								
	*GRADE 9 MATH FULL ENDLINE SAMPLE, DESCRIPTIVE STATISTICS
	
		* set survey parameters
		svyset schoolid [pweight=sch_wght], strata(dist_stratum)
		
		* Check that standardized score mean is zero in control sample
		svy, over(treat): mean stdIRTmatgr9
		
		
	* SETUP FOR COLLECTING P-VALUES (FOR LATER USE IN MULTIPLE HYPOTHESIS TESTING CORRECTIONS)
	
		gen p1= .   // We will collect unadjusted pvalues for impact estimates in the variable p1 -- only for LATE
		gen p2= .   // We will collect adjusted pvalues for impact estimates in the variable p2 -- only for LATE
		gen str20 specification = "to be filled in"  // this will be for verifying that the right p-values are being collected for a given test-taking
		local j= 0   // will increase this by 1 for each specification/pvalue, and use for filling p-values into different observations of the variable p1
	

	* GRADE 9 MATH FULL ENDLINE SAMPLE, IMPACT ESTIMATION 
			
		* set test score variable
			gen score = stdIRTmatgr9
						
		* Quantile regressions with clustered standard errors (via bootstrapping), but no weights  (Table 7)
			
			set seed 1001
			
			bs, cluster(schoolid) strata(dist_stratum) reps(50) : qreg score treat, q(0.1)	
			estimates store quantile10
			test treat 
			local j=`j'+1
			replace p1=r(p) if _n==`j'
			replace p2=r(p) if _n==`j'  // for ITT adjusted and unadjusted p values are the same
			replace specification = "G9 math full itt 10th quantile" if _n==`j'
			
			set seed 2001
		
			bs, cluster(schoolid) strata(dist_stratum) reps(50): qreg score treat, q(0.25)			
			estimates store quantile25
			test treat 
			local j=`j'+1
			replace p1=r(p) if _n==`j'
			replace p2=r(p) if _n==`j'  // for ITT adjusted and unadjusted p values are the same
			replace specification = "G9 math full itt 25th quantile" if _n==`j'
			
			set seed 3001
			
			bs, cluster(schoolid) strata(dist_stratum) reps(50): qreg score treat, q(0.5)
			estimates store quantile50
			test treat 
			local j=`j'+1
			replace p1=r(p) if _n==`j'
			replace p2=r(p) if _n==`j'  // for ITT adjusted and unadjusted p values are the same
			replace specification = "G9 math full itt 50th quantile"	if _n==`j'
			
			set seed 4001
			
			bs, cluster(schoolid) strata(dist_stratum) reps(50): qreg score treat, q(0.75)
			estimates store quantile75
			test treat 
			local j=`j'+1
			replace p1=r(p) if _n==`j'
			replace p2=r(p) if _n==`j'  // for ITT adjusted and unadjusted p values are the same
			replace specification = "G9 math full itt 75th quantile" if _n==`j'
			
			set seed 5001
			
			bs, cluster(schoolid) strata(dist_stratum) reps(50): qreg score treat, q(0.9)
			estimates store quantile90
			test treat 
			local j=`j'+1
			replace p1=r(p) if _n==`j'
			replace p2=r(p) if _n==`j'  // for ITT adjusted and unadjusted p values are the same
			replace specification = "G9 math full itt 90th quantile" if _n==`j'
			
			* print results
			di "GRADE 9 MATH FULL SAMPLE QUANTILE REGRESSIONS, Table 7"
			etable, column(index) estimates(quantile10 quantile25 quantile50 quantile75 quantile90) showstars showstarsnote keep(treat) cstat(_r_b) /// 
			cstat(_r_se) cstat(_r_ci) mstat(N) ///
			stars(.10 "*" .05 "**" .01 "***", attach(_r_b))  
		
					
			* With weights, but no clustering  (Table S6)
			qreg score treat [pw=sch_wght], q(0.1) vce(robust)
			estimates store quantile10
			test treat 
			local j=`j'+1
			replace p1=r(p) if _n==`j'
			replace p2=r(p) if _n==`j'  // for ITT adjusted and unadjusted p values are the same
			replace specification = "G9 math full itt 10th quantile weights" if _n==`j'
			
			qreg score treat [pw=sch_wght], q(0.25) vce(robust)
			estimates store quantile25
			test treat 
			local j=`j'+1
			replace p1=r(p) if _n==`j'
			replace p2=r(p) if _n==`j'  // for ITT adjusted and unadjusted p values are the same
			replace specification = "G9 math full itt 25th quantile weights" if _n==`j'
			
			qreg score treat [pw=sch_wght], q(0.5) vce(robust)
			estimates store quantile50
			test treat 
			local j=`j'+1
			replace p1=r(p) if _n==`j'
			replace p2=r(p) if _n==`j'  // for ITT adjusted and unadjusted p values are the same
			replace specification = "G9 math full itt 50th quantile weights" if _n==`j'
			
			qreg score treat [pw=sch_wght], q(0.75) vce(robust)
			estimate store quantile75
			test treat 
			local j=`j'+1
			replace p1=r(p) if _n==`j'
			replace p2=r(p) if _n==`j'  // for ITT adjusted and unadjusted p values are the same
			replace specification = "G9 math full itt 75th quantile weights" if _n==`j'
			
			qreg score treat [pw=sch_wght], q(0.9) vce(robust)
			estimates store quantile90
			test treat 
			local j=`j'+1
			replace p1=r(p) if _n==`j'
			replace p2=r(p) if _n==`j'  // for ITT adjusted and unadjusted p values are the same
			replace specification = "G9 math full itt 90th quantile weights" if _n==`j'
								
				
			di "GRADE 9 MATH FULL SAMPLE QUANTILE REGRESSIONS with weights no clustering, Table S6"
			etable, column(index) estimates(quantile10 quantile25 quantile50 quantile75 quantile90) showstars showstarsnote keep(treat) cstat(_r_b) /// 
			cstat(_r_se) cstat(_r_ci) mstat(N) ///
			stars(.10 "*" .05 "**" .01 "***", attach(_r_b))  
					
					
		* save the 10 collected p-values and their specifications in a small dataset	
			keep if _n<=10
			keep p1 p2 specification
			tempfile g9_math_quantile
			save "`g9_math_quantile'", replace
			list
						
			clear
		


****************************************************************************

****GRADE 10 MATH FULL ENDLINE SAMPLE:  QUANTILE REGRESSIONS

****************************************************************************	
	
		* Bring in endline math grade 10 assessment data 
		use "`e_math_10'", clear
			
		
		*create raw score
		egen RawMath10=rowtotal(Math_*)
		drop Math_*
		
		* merge in school info
		sort schoolid
		merge m:1 schoolid using "`sch_temp'" //0 unmatched
		list distname study schoolid if _m~=3 
		drop if _m~=3 //14
		drop _m
		sort stu_serial
		
				
		* Merge in data on grade 10 students
			merge 1:1 stu_serial using Grade10_c
			
		* Check for availability of father education variable
			tab f_educlevel _m, mi /* Missing for only 4 of matched observations */
			
			l district schoolid stu_serial if _m==1
			l district schoolid stu_serial if _m==3 & f_educlevel==.
			drop if _m~=3
			drop _m

		* Merge in data on grade 9 students matched with math teachers
			merge 1:1 stu_serial using "`g10mattc'" //310 unmatched (306 master, 4 using)
			drop if _m==2 //4
			drop _m

		* Standardize grade 9 math IRT score, 1st for all items then for SSDP-focus items and for raw scores
			svy, over(treat): mean theta_gr10 
			mat b = e(b)
			gen mean = b[1,1]
			estat sd
			mat sd = r(sd)
			gen sd = sd[1,1]
			gen stdIRTmatgr10=(theta_gr10-mean)/sd
			drop mean sd
	
			svy, over(treat): mean theta_gr10_SSDP 
			mat b = e(b)
			gen mean = b[1,1]
			estat sd
			mat sd = r(sd)
			gen sd = sd[1,1]
			gen stdIRTmatgr10_SSDP=(theta_gr10_SSDP-mean)/sd
			drop mean sd
							
		* Generate dummy variable for LATE regressions: teacher actually trained in relevant SSDP training
			gen tchrtreat_m=ssdp_m_t*treat
			
		*Label variables
			la var stdIRTmatgr10 "Grade 10 Math - All"
			la var stdIRTmatgr10_SSDP "Grade 10 Math - SSDP"
			
			
		* Save data for grade 9, math, full sample
			gen treated=tchrtreat_m
		
			tempfile d10mfull
			save "`d10mfull'", replace
			
	
								
	*GRADE 10 MATH FULL ENDLINE SAMPLE, DESCRIPTIVE STATISTICS
	
		* set survey parameters
		svyset schoolid [pweight=sch_wght], strata(dist_stratum)
		
		* Check that standardized score mean is zero in control sample
		svy, over(treat): mean stdIRTmatgr10
		
		
	* SETUP FOR COLLECTING P-VALUES (FOR LATER USE IN MULTIPLE HYPOTHESIS TESTING CORRECTIONS)
	
		gen p1= .   // We will collect unadjusted pvalues for impact estimates in the variable p1
		gen p2= .   // We will collect adjusted pvalues for impact estimates in the variable p2
		gen str20 specification = "to be filled in"  // this will be for verifying that the right p-values are being collected for a given test-taking
		local j= 0   // will increase this by 1 for each specification/pvalue, and use for filling p-values into different observations of the variable p1
	
	* set test score variable
			gen score = stdIRTmatgr10
	
	* Quantile regressions with clustered standard errors (via bootstrapping), but no weights  (Table 7)
	
			set seed 6001
			
			bs, cluster(schoolid) strata(dist_stratum) reps(50): qreg score treat, q(0.1)	
			estimates store quantile10
			test treat 
			local j=`j'+1
			replace p1=r(p) if _n==`j'
			replace p2=r(p) if _n==`j'  // for ITT adjusted and unadjusted p values are the same
			replace specification = "G10 math full itt 10th quantile" if _n==`j'
			
			set seed 7001
			
			bs, cluster(schoolid) strata(dist_stratum) reps(50): qreg score treat, q(0.25)			
			estimates store quantile25
			test treat 
			local j=`j'+1
			replace p1=r(p) if _n==`j'
			replace p2=r(p) if _n==`j'  // for ITT adjusted and unadjusted p values are the same
			replace specification = "G10 math full itt 25th quantile" if _n==`j'
			
			set seed 8001
			
			bs, cluster(schoolid) strata(dist_stratum) reps(50): qreg score treat, q(0.5)
			estimates store quantile50
			test treat 
			local j=`j'+1
			replace p1=r(p) if _n==`j'
			replace p2=r(p) if _n==`j'  // for ITT adjusted and unadjusted p values are the same
			replace specification = "G10 math full itt 50th quantile"	 if _n==`j'		
			
			set seed 9001
			
			bs, cluster(schoolid) strata(dist_stratum) reps(50): qreg score treat, q(0.75)
			estimates store quantile75
			test treat 
			local j=`j'+1
			replace p1=r(p) if _n==`j'
			replace p2=r(p) if _n==`j'  // for ITT adjusted and unadjusted p values are the same
			replace specification = "G10 math full itt 75th quantile" if _n==`j'
			
			set seed 10001
			
			bs, cluster(schoolid) strata(dist_stratum) reps(50): qreg score treat, q(0.9)
			estimates store quantile90
			test treat 
			local j=`j'+1
			replace p1=r(p) if _n==`j'
			replace p2=r(p) if _n==`j'  // for ITT adjusted and unadjusted p values are the same
			replace specification = "G10 math full itt 90th quantile" if _n==`j'
			
			* print results
			di "GRADE 10 MATH FULL SAMPLE QUANTILE REGRESSIONS Table 7"
			etable, column(index) estimates(quantile10 quantile25 quantile50 quantile75 quantile90) showstars showstarsnote keep(treat) cstat(_r_b) /// 
			cstat(_r_se) cstat(_r_ci) mstat(N) ///
			stars(.10 "*" .05 "**" .01 "***", attach(_r_b))  
				
			
			* With weights, but no clustering  (Table S6)
			qreg score treat [pw=sch_wght], q(0.1) vce(robust)
			estimates store quantile10
			test treat 
			local j=`j'+1
			replace p1=r(p) if _n==`j'
			replace p2=r(p) if _n==`j'  // for ITT adjusted and unadjusted p values are the same
			replace specification = "G10 math full itt 10th quantile weights" if _n==`j'
			
			qreg score treat [pw=sch_wght], q(0.25) vce(robust)
			estimates store quantile25
			test treat 
			local j=`j'+1
			replace p1=r(p) if _n==`j'
			replace p2=r(p) if _n==`j'  // for ITT adjusted and unadjusted p values are the same
			replace specification = "G10 math full itt 25th quantile weights" if _n==`j'
			
			qreg score treat [pw=sch_wght], q(0.5) vce(robust)
			estimates store quantile50
			test treat 
			local j=`j'+1
			replace p1=r(p) if _n==`j'
			replace p2=r(p) if _n==`j'  // for ITT adjusted and unadjusted p values are the same
			replace specification = "G10 math full itt 50th quantile weights" if _n==`j'
			
			qreg score treat [pw=sch_wght], q(0.75) vce(robust)
			estimate store quantile75
			test treat 
			local j=`j'+1
			replace p1=r(p) if _n==`j'
			replace p2=r(p) if _n==`j'  // for ITT adjusted and unadjusted p values are the same
			replace specification = "G10 math full itt 75th quantile weights" if _n==`j'
			
			qreg score treat [pw=sch_wght], q(0.9) vce(robust)
			estimates store quantile90
			estimates store quantile90_m_g10_check  // saving for checking pooling
			test treat 
			local j=`j'+1
			replace p1=r(p) if _n==`j'
			replace p2=r(p) if _n==`j'  // for ITT adjusted and unadjusted p values are the same
			replace specification = "G10 math full itt 90th quantile weights" if _n==`j'
								
				
			di "GRADE 10 MATH FULL SAMPLE QUANTILE REGRESSIONS with weights no clustering, Table S6"
			etable, column(index) estimates(quantile10 quantile25 quantile50 quantile75 quantile90) showstars showstarsnote keep(treat) cstat(_r_b) /// 
			cstat(_r_se) cstat(_r_ci) mstat(N) ///
			stars(.10 "*" .05 "**" .01 "***", attach(_r_b))  
			
			
			
		* save the 10 collected p-values and their specifications in a small dataset	
			keep if _n<=10
			keep p1 p2 specification
			tempfile g10_math_quantile
			save "`g10_math_quantile'", replace
			list
						
			clear
		
	
****************************************************************************

****GRADE 9 SCIENCE FULL ENDLINE SAMPLE:  QUANTILE REGRESSIONS 

****************************************************************************	
	
		* Bring in endline science grade 9 assessment data 
		use "`e_sci_9'", clear
		
				*create raw score
		egen RawSci9=rowtotal(Sci_*)
		drop Sci_*
		
		* merge in school info
		sort schoolid
		merge m:1 schoolid using "`sch_temp'" //0 unmatched
		drop _m
		count if stu_serial=="" // 3 observations with missing stu_serial
		drop if stu_serial=="" //3
		sort stu_serial
		
	
		* Merge in data on grade 9 students
			merge 1:1 stu_serial using Grade09_c
			
			
		* Check for availability of father education variable
			tab f_educlevel _m, mi /* Missing for only 8 of matched observations */
			l district schoolid stu_serial if _m==1
			l district schoolid stu_serial if _m==3 & f_educlevel==.
			drop if _m~=3
			drop _m

		* Merge in data on grade 9 students matched with science teachers
			merge 1:1 stu_serial using "`g09scitc'" //763 unmatched (752 master, 10 using)
			
			drop if _m==2 //10
			drop _m
			
			
		* Standardize grade 9 science IRT score, 1st for all items then for SSDP-focus items and for raw scores
			svy, over(treat): mean theta_gr09 
			mat b = e(b)
			gen mean = b[1,1]
			estat sd
			mat sd = r(sd)
			gen sd = sd[1,1]
			gen stdIRTscigr9=(theta_gr09-mean)/sd
			drop mean sd
	
			svy, over(treat): mean theta_gr09_SSDP 
			mat b = e(b)
			gen mean = b[1,1]
			estat sd
			mat sd = r(sd)
			gen sd = sd[1,1]
			gen stdIRTscigr9_SSDP=(theta_gr09_SSDP-mean)/sd
			drop mean sd
			
		* Generate dummy variable for LATE regressions: teacher actually trained in relevant SSDP training
			gen tchrtreat_s=ssdp_s_t*treat

	
		*Label variables
			la var stdIRTscigr9 "Grade 9 Science - All"
			la var stdIRTscigr9_SSDP "Grade 9 Science - SSDP"
			
			
		* Save data for grade 9, science, full sample
			gen treated=tchrtreat_s
		
			tempfile d9sfull
			save "`d9sfull'", replace
	
								
	*GRADE 9 SCIENCE FULL ENDLINE SAMPLE, DESCRIPTIVE STATISTICS
	
		* set survey parameters
		svyset schoolid [pweight=sch_wght], strata(dist_stratum)
		
		* Check that standardized score mean is zero in control sample
		svy, over(treat): mean stdIRTscigr9
		
	* SETUP FOR COLLECTING P-VALUES (FOR LATER USE IN MULTIPLE HYPOTHESIS TESTING CORRECTIONS)
	
		gen p1= .   // We will collect unadjusted pvalues for impact estimates in the variable p1
		gen p2= .   // We will collect adjusted pvalues for impact estimates in the variable p2
		gen str20 specification = "to be filled in"  // this will be for verifying that the right p-values are being collected for a given test-taking
		local j= 0   // will increase this by 1 for each specification/pvalue, and use for filling p-values into different observations of the variable p1
	
	* set test score variable
			gen score = stdIRTscigr9
	
	* Quantile regressions with clustered standard errors (via bootstrapping), but no weights, Table 7
	
			set seed 11001
			
			bs, cluster(schoolid) strata(dist_stratum) reps(50): qreg score treat, q(0.1)	
			estimates store quantile10
			test treat 
			local j=`j'+1
			replace p1=r(p) if _n==`j'
			replace p2=r(p) if _n==`j'  // for ITT adjusted and unadjusted p values are the same
			replace specification = "G9 science full itt 10th quantile" if _n==`j'
			
			set seed 12001
			
			bs, cluster(schoolid) strata(dist_stratum) reps(50): qreg score treat, q(0.25)			
			estimates store quantile25
			test treat 
			local j=`j'+1
			replace p1=r(p) if _n==`j'
			replace p2=r(p) if _n==`j'  // for ITT adjusted and unadjusted p values are the same
			replace specification = "G9 science full itt 25th quantile" if _n==`j'
			
			set seed 13001
			
			bs, cluster(schoolid) strata(dist_stratum) reps(50): qreg score treat, q(0.5)
			estimates store quantile50
			test treat 
			local j=`j'+1
			replace p1=r(p) if _n==`j'
			replace p2=r(p) if _n==`j'  // for ITT adjusted and unadjusted p values are the same
			replace specification = "G9 science full itt 50th quantile"		if _n==`j' 
			
			set seed 14001
			
			bs, cluster(schoolid) strata(dist_stratum) reps(50): qreg score treat, q(0.75)
			estimates store quantile75
			test treat 
			local j=`j'+1
			replace p1=r(p) if _n==`j'
			replace p2=r(p) if _n==`j'  // for ITT adjusted and unadjusted p values are the same
			replace specification = "G9 science full itt 75th quantile" if _n==`j'
			
			set seed 15001
			
			bs, cluster(schoolid) strata(dist_stratum) reps(50): qreg score treat, q(0.9)
			estimates store quantile90
			test treat 
			local j=`j'+1
			replace p1=r(p) if _n==`j'
			replace p2=r(p) if _n==`j'  // for ITT adjusted and unadjusted p values are the same
			replace specification = "G9 science full itt 90th quantile"  if _n==`j'
			
			* print results
			di "GRADE 9 SCIENCE FULL SAMPLE QUANTILE REGRESSIONS, Table 7"
			etable, column(index) estimates(quantile10 quantile25 quantile50 quantile75 quantile90) showstars showstarsnote keep(treat) cstat(_r_b) /// 
			cstat(_r_se) cstat(_r_ci) mstat(N) ///
			stars(.10 "*" .05 "**" .01 "***", attach(_r_b))  
						
			
			* With weights, but no clustering (Table S6)
			qreg score treat [pw=sch_wght], q(0.1) vce(robust)
			estimates store quantile10
			test treat 
			local j=`j'+1
			replace p1=r(p) if _n==`j'
			replace p2=r(p) if _n==`j'  // for ITT adjusted and unadjusted p values are the same
			replace specification = "G9 science full itt 10th quantile weights" if _n==`j'
			
			qreg score treat [pw=sch_wght], q(0.25) vce(robust)
			estimates store quantile25
			test treat 
			local j=`j'+1
			replace p1=r(p) if _n==`j'
			replace p2=r(p) if _n==`j'  // for ITT adjusted and unadjusted p values are the same
			replace specification = "G9 science full itt 25th quantile weights" if _n==`j'
			
			qreg score treat [pw=sch_wght], q(0.5) vce(robust)
			estimates store quantile50
			test treat 
			local j=`j'+1
			replace p1=r(p) if _n==`j'
			replace p2=r(p) if _n==`j'  // for ITT adjusted and unadjusted p values are the same
			replace specification = "G9 science full itt 50th quantile weights" if _n==`j'
			
			qreg score treat [pw=sch_wght], q(0.75) vce(robust)
			estimate store quantile75
			test treat 
			local j=`j'+1
			replace p1=r(p) if _n==`j'
			replace p2=r(p) if _n==`j'  // for ITT adjusted and unadjusted p values are the same
			replace specification = "G9 science full itt 75th quantile weights" if _n==`j'
			
			qreg score treat [pw=sch_wght], q(0.9) vce(robust)
			estimates store quantile90
			test treat 
			local j=`j'+1
			replace p1=r(p) if _n==`j'
			replace p2=r(p) if _n==`j'  // for ITT adjusted and unadjusted p values are the same
			replace specification = "G9 science full itt 90th quantile weights" if _n==`j'
								
				
			di "GRADE 9 SCIENCE FULL SAMPLE QUANTILE REGRESSIONS with weights no clustering, Table S6"
			etable, column(index) estimates(quantile10 quantile25 quantile50 quantile75 quantile90) showstars showstarsnote keep(treat) cstat(_r_b) /// 
			cstat(_r_se) cstat(_r_ci) mstat(N) ///
			stars(.10 "*" .05 "**" .01 "***", attach(_r_b))  
			
				
		* save the 10 collected p-values and their specifications in a small dataset	
			keep if _n<=10
			keep p1 p2 specification
			tempfile g9_science_quantile
			save "`g9_science_quantile'", replace
			list
						
			clear
		
	
****************************************************************************

****GRADE 10 SCIENCE FULL ENDLINE SAMPLE:  QUANTILE REGRESSIONS

****************************************************************************	

		* Bring in endline science grade 10 assessment data 
		use "`e_sci_10'", clear
		
				*create raw score
		egen RawSci10=rowtotal(Sci_*)
		drop Sci_*
		
		* merge in school info
		sort schoolid
		merge m:1 schoolid using "`sch_temp'" //14 unmatched
		drop if _merge==2
		drop _m
		count if stu_serial=="" // 2 observations with missing stu_serial
		drop if stu_serial=="" //2
		sort stu_serial
		
		sort stu_serial
		gen dup=(stu_serial==stu_serial[_n-1])
		tab dup // Two observations with same non-missing stu_serial, this is the 2nd
		l stu_serial if dup==1
		su if stu_serial=="G9N1371" // 2 distinct observations, drop both of them 
		drop if stu_serial=="G9N1371" 
			
		* Merge in data on grade 10 students
			merge 1:1 stu_serial using Grade10_c
			
			
		* Check for availability of father education variable
			tab f_educlevel _m, mi /* Missing for only 8 of matched observations */
			l district schoolid stu_serial if _m==1
			l district schoolid stu_serial if _m==3 & f_educlevel==.
			drop if _m~=3
			drop _m

		* Merge in data on grade 10 students matched with science teachers
			merge 1:1 stu_serial using "`g10scitc'" //246unmatched (238 master, 8 using)
				
			drop if _m==2 //8
			drop _m
			
			
		* Standardize grade 10 science IRT score, 1st for all items then for SSDP-focus items and for raw scores
			svy, over(treat): mean theta_gr10 
			mat b = e(b)
			gen mean = b[1,1]
			estat sd
			mat sd = r(sd)
			gen sd = sd[1,1]
			gen stdIRTscigr10=(theta_gr10-mean)/sd
			drop mean sd
	
			svy, over(treat): mean theta_gr10_SSDP 
			mat b = e(b)
			gen mean = b[1,1]
			estat sd
			mat sd = r(sd)
			gen sd = sd[1,1]
			gen stdIRTscigr10_SSDP=(theta_gr10_SSDP-mean)/sd
			drop mean sd
			
		* Generate dummy variable for LATE regressions: teacher actually trained in relevant SSDP training
			gen tchrtreat_s=ssdp_s_t*treat

	
		*Label variables
			la var stdIRTscigr10 "Grade 10 Science - All"
			la var stdIRTscigr10_SSDP "Grade 10 Science - SSDP"
			
			
		* Save data for grade 10, science, full sample
			gen treated=tchrtreat_s
		
			tempfile d10sfull
			save "`d10sfull'", replace
			
		
								
	*GRADE 10 SCIENCE FULL ENDLINE SAMPLE, DESCRIPTIVE STATISTICS
	
		* set survey parameters
		svyset schoolid [pweight=sch_wght], strata(dist_stratum)
		
		* Check that standardized score mean is zero in control sample
		svy, over(treat): mean stdIRTscigr10
		
				
	* SETUP FOR COLLECTING P-VALUES (FOR LATER USE IN MULTIPLE HYPOTHESIS TESTING CORRECTIONS)
	
		gen p1= .   // We will collect unadjusted pvalues for impact estimates in the variable p1
		gen p2= .   // We will collect adjusted pvalues for impact estimates in the variable p2
		gen str20 specification = "to be filled in"  // this will be for verifying that the right p-values are being collected for a given test-taking
		local j= 0   // will increase this by 1 for each specification/pvalue, and use for filling p-values into different observations of the variable p1
	
	
	* set test score variable
			gen score = stdIRTscigr10
	
	* Quantile regressions with clustered standard errors (via bootstrapping), but no weights, Table 7
	
			set seed 16001
			
			bs, cluster(schoolid) strata(dist_stratum) reps(50): qreg score treat, q(0.1)	
			estimates store quantile10
			test treat 
			local j=`j'+1
			replace p1=r(p) if _n==`j'
			replace p2=r(p) if _n==`j'  // for ITT adjusted and unadjusted p values are the same
			replace specification = "G10 science full itt 10th quantile" if _n==`j'
			
			set seed 17001
			
			bs, cluster(schoolid) strata(dist_stratum) reps(50): qreg score treat, q(0.25)			
			estimates store quantile25
			test treat 
			local j=`j'+1
			replace p1=r(p) if _n==`j'
			replace p2=r(p) if _n==`j'  // for ITT adjusted and unadjusted p values are the same
			replace specification = "G10 science full itt 25th quantile" if _n==`j'
			
			set seed 18001
			
			bs, cluster(schoolid) strata(dist_stratum) reps(50): qreg score treat, q(0.5)
			estimates store quantile50
			test treat 
			local j=`j'+1
			replace p1=r(p) if _n==`j'
			replace p2=r(p) if _n==`j'  // for ITT adjusted and unadjusted p values are the same
			replace specification = "G10 science full itt 50th quantile" if _n==`j'		
			
			set seed 19001
			
			bs, cluster(schoolid) strata(dist_stratum) reps(50): qreg score treat, q(0.75)
			estimates store quantile75
			test treat 
			local j=`j'+1
			replace p1=r(p) if _n==`j'
			replace p2=r(p) if _n==`j'  // for ITT adjusted and unadjusted p values are the same
			replace specification = "G10 science full itt 75th quantile" if _n==`j'
			
			set seed 20001
			
			bs, cluster(schoolid) strata(dist_stratum) reps(50): qreg score treat, q(0.9)
			estimates store quantile90
			test treat 
			local j=`j'+1
			replace p1=r(p) if _n==`j'
			replace p2=r(p) if _n==`j'  // for ITT adjusted and unadjusted p values are the same
			replace specification = "G10 science full itt 90th quantile" if _n==`j'
			
			* print results
			di "GRADE 10 SCIENCE FULL SAMPLE QUANTILE REGRESSIONS, Table 7"
			etable, column(index) estimates(quantile10 quantile25 quantile50 quantile75 quantile90) showstars showstarsnote keep(treat) cstat(_r_b) /// 
			cstat(_r_se) cstat(_r_ci) mstat(N) ///
			stars(.10 "*" .05 "**" .01 "***", attach(_r_b))  
			
				
			* With weights, but no clustering (Table S6)
			qreg score treat [pw=sch_wght], q(0.1) vce(robust)
			estimates store quantile10
			test treat 
			local j=`j'+1
			replace p1=r(p) if _n==`j'
			replace p2=r(p) if _n==`j'  // for ITT adjusted and unadjusted p values are the same
			replace specification = "G10 science full itt 10th quantile weights" if _n==`j'
			
			qreg score treat [pw=sch_wght], q(0.25) vce(robust)
			estimates store quantile25
			test treat 
			local j=`j'+1
			replace p1=r(p) if _n==`j'
			replace p2=r(p) if _n==`j'  // for ITT adjusted and unadjusted p values are the same
			replace specification = "G10 science full itt 25th quantile weights" if _n==`j'
			
			qreg score treat [pw=sch_wght], q(0.5) vce(robust)
			estimates store quantile50
			test treat 
			local j=`j'+1
			replace p1=r(p) if _n==`j'
			replace p2=r(p) if _n==`j'  // for ITT adjusted and unadjusted p values are the same
			replace specification = "G10 science full itt 50th quantile weights" if _n==`j'
			
			qreg score treat [pw=sch_wght], q(0.75) vce(robust)
			estimate store quantile75
			test treat 
			local j=`j'+1
			replace p1=r(p) if _n==`j'
			replace p2=r(p) if _n==`j'  // for ITT adjusted and unadjusted p values are the same
			replace specification = "G10 science full itt 75th quantile weights" if _n==`j'
			
			qreg score treat [pw=sch_wght], q(0.9) vce(robust)
			estimates store quantile90
			estimates store quantile90_s_g10_check  // for checking pooling later
			test treat 
			local j=`j'+1
			replace p1=r(p) if _n==`j'
			replace p2=r(p) if _n==`j'  // for ITT adjusted and unadjusted p values are the same
			replace specification = "G10 science full itt 90th quantile weights" if _n==`j'
								
				
			di "GRADE 10 SCIENCE FULL SAMPLE QUANTILE REGRESSIONS with weights no clustering, Table S6"
			etable, column(index) estimates(quantile10 quantile25 quantile50 quantile75 quantile90) showstars showstarsnote keep(treat) cstat(_r_b) /// 
			cstat(_r_se) cstat(_r_ci) mstat(N) ///
			stars(.10 "*" .05 "**" .01 "***", attach(_r_b))  
			
					
			
		* save the 10 collected p-values and their specifications in a small dataset	
			keep if _n<=10
			keep p1 p2 specification
			tempfile g10_science_quantile
			save "`g10_science_quantile'", replace
			list
						
			clear
			
	
****************************************************************************

****POOLED GRADES 9 AND 10 MATH FULL ENDLINE SAMPLE:  QUANTILE REGRESSIONS 

****************************************************************************	

	* READ IN DATA FOR BOTH YEARS, APPEND, CREATE GRADE AND OUTCOME VARIABLES

		use "`d9mfull'", clear
		gen grade10=0
		
		append using "`d10mfull'"
		replace grade10=1 if grade10==.
		gen IRTmath= stdIRTmatgr9 if grade10==0    // Will be doing pooled estimation only for the full assessment score, not SSDP focus
		replace IRTmath= stdIRTmatgr10 if grade10==1
			
		* check stats
		tab grade10, miss
		tab grade10, summ(IRTmath)
		summ stdIRTmatgr9 stdIRTmatgr10
		
	
	* SETUP FOR COLLECTING P-VALUES (FOR LATER USE IN MULTIPLE HYPOTHESIS TESTING CORRECTIONS)
		gen p1= .   // We will collect unadjusted pvalues for impact estimates in the variable p1
		gen p2= .   // We will collect adjusted pvalues for impact estimates in the variable p2
		gen str20 specification = "to be filled in"  // this will be for verifying that the right p-values are being collected for a given test-taking
		local j= 0   // will increase this by 1 for each specification/pvalue, and use for filling p-values into different observations of the variable p1

	* REPLICATE GRADE 10 MATH FULL SAMPLE RESULTS for quantile 90, WITHOUT STD ERROR CORRECTIONS FOR LATE (TO VERIFY THAT POOLING INTRODUCES NO ERRORS)
		
		gen score = IRTmath
		
		qreg score treat [pw=sch_wght] if grade10==1, q(0.9) vce(robust)
			estimates store quantile90p
						
				
			di "checking replication for grade 10 man 90th quantile"
			*etable, column(index) estimates(quantile90_m_g10_check quantile90p) showstars showstarsnote keep(treat) cstat(_r_b) /// 
			cstat(_r_se) cstat(_r_ci) mstat(N) ///
			stars(.10 "*" .05 "**" .01 "***", attach(_r_b))  
			// checks out ok
			

	
	* Quantile regressions with clustered standard errors (via bootstrapping), but no weights, Table 7
	
			gen year_dist_stratum = (grade10*1000)+dist_stratum
			
			set seed 21001
						
			bs, cluster(schoolid) strata(year_dist_stratum) reps(50): qreg score treat grade10, q(0.1)	
			estimates store quantile10
			test treat 
			local j=`j'+1
			replace p1=r(p) if _n==`j'
			replace p2=r(p) if _n==`j'  // for ITT adjusted and unadjusted p values are the same
			replace specification = "pooled math full itt 10th quantile" if _n==`j'
			
			set seed 22001
			
			bs, cluster(schoolid) strata(year_dist_stratum) reps(50): qreg score treat grade10, q(0.25)			
			estimates store quantile25
			test treat 
			local j=`j'+1
			replace p1=r(p) if _n==`j'
			replace p2=r(p) if _n==`j'  // for ITT adjusted and unadjusted p values are the same
			replace specification = "pooled math full itt 25th quantile" if _n==`j'
			
			set seed 23001
			
			bs, cluster(schoolid) strata(year_dist_stratum) reps(50): qreg score treat grade10, q(0.5)
			estimates store quantile50
			test treat 
			local j=`j'+1
			replace p1=r(p) if _n==`j'
			replace p2=r(p) if _n==`j'  // for ITT adjusted and unadjusted p values are the same
			replace specification = "pooled math full itt 50th quantile"	if _n==`j' 	
			
			set seed 24001
			
			bs, cluster(schoolid) strata(year_dist_stratum) reps(50): qreg score treat grade10, q(0.75)
			estimates store quantile75
			test treat 
			local j=`j'+1
			replace p1=r(p) if _n==`j'
			replace p2=r(p) if _n==`j'  // for ITT adjusted and unadjusted p values are the same
			replace specification = "pooled math full itt 75th quantile" if _n==`j'
			
			set seed 25001
			
			bs, cluster(schoolid) strata(year_dist_stratum) reps(50): qreg score treat grade10, q(0.9)
			estimates store quantile90
			test treat 
			local j=`j'+1
			replace p1=r(p) if _n==`j'
			replace p2=r(p) if _n==`j'  // for ITT adjusted and unadjusted p values are the same
			replace specification = "pooled math full itt 90th quantile" if _n==`j'
			
			* print results
			di "POOLED MATH FULL SAMPLE QUANTILE REGRESSIONS, Table 7"
			etable, column(index) estimates(quantile10 quantile25 quantile50 quantile75 quantile90) showstars showstarsnote keep(treat) cstat(_r_b) /// 
			cstat(_r_se) cstat(_r_ci) mstat(N) ///
			stars(.10 "*" .05 "**" .01 "***", attach(_r_b))  
			
			
			* With weights, but no clustering (Table S6)
			qreg score treat grade10 [pw=sch_wght], q(0.1) vce(robust)
			estimates store quantile10
			test treat 
			local j=`j'+1
			replace p1=r(p) if _n==`j'
			replace p2=r(p) if _n==`j'  // for ITT adjusted and unadjusted p values are the same
			replace specification = "pooled math full itt 10th quantile weights" if _n==`j'
			
			qreg score treat grade10 [pw=sch_wght], q(0.25) vce(robust)
			estimates store quantile25
			test treat 
			local j=`j'+1
			replace p1=r(p) if _n==`j'
			replace p2=r(p) if _n==`j'  // for ITT adjusted and unadjusted p values are the same
			replace specification = "pooled math full itt 25th quantile weights" if _n==`j'
			
			qreg score treat grade10 [pw=sch_wght], q(0.5) vce(robust)
			estimates store quantile50
			test treat 
			local j=`j'+1
			replace p1=r(p) if _n==`j'
			replace p2=r(p) if _n==`j'  // for ITT adjusted and unadjusted p values are the same
			replace specification = "pooled math full itt 50th quantile weights" if _n==`j'
			
			qreg score treat grade10 [pw=sch_wght], q(0.75) vce(robust)
			estimate store quantile75
			test treat 
			local j=`j'+1
			replace p1=r(p) if _n==`j'
			replace p2=r(p) if _n==`j'  // for ITT adjusted and unadjusted p values are the same
			replace specification = "pooled math full itt 75th quantile weights" if _n==`j'
			
			qreg score treat grade10 [pw=sch_wght], q(0.9) vce(robust)
			estimates store quantile90
			test treat 
			local j=`j'+1
			replace p1=r(p) if _n==`j'
			replace p2=r(p) if _n==`j'  // for ITT adjusted and unadjusted p values are the same
			replace specification = "pooled math full itt 90th quantile weights" if _n==`j'
								
			
			di "POOLED MATH FULL SAMPLE QUANTILE REGRESSIONS with weights no clustering"
			etable, column(index) estimates(quantile10 quantile25 quantile50 quantile75 quantile90) showstars showstarsnote keep(treat) cstat(_r_b) /// 
			cstat(_r_se) cstat(_r_ci) mstat(N) ///
			stars(.10 "*" .05 "**" .01 "***", attach(_r_b)) 
		
		
			* save the 10 collected p-values and their specifications in a small dataset	
			keep if _n<=10
			keep p1 p2 specification
			tempfile pooled_math_quantile
			save "`pooled_math_quantile'", replace
			list
						
			clear
		
		
****************************************************************************

****POOLED GRADES 9 AND 10 SCIENCE FULL ENDLINE SAMPLE:  QUANTILE REGRESSIONS

****************************************************************************	

	* READ IN DATA FOR BOTH YEARS, DEFINE GRADE AND ASSESSMENT VARIABLES
		use "`d9sfull'", clear
		gen grade10=0
		append using "`d10sfull'"
		replace grade10=1 if grade10==.
		gen IRTsci= stdIRTscigr9 if grade10==0
		replace IRTsci= stdIRTscigr10 if grade10==1
		
		* check stats
		tab grade10, miss
		tab grade10, summ(IRTsci)
		summ stdIRTscigr9 stdIRTscigr10
		
			
	* SETUP FOR COLLECTING P-VALUES (FOR LATER USE IN MULTIPLE HYPOTHESIS TESTING CORRECTIONS)
		gen p1= .   // We will collect unadjusted pvalues for impact estimates in the variable p1
		gen p2= .   // We will collect adjusted pvalues for impact estimates in the variable p2
		gen str20 specification = "to be filled in"  // this will be for verifying that the right p-values are being collected for a given test-taking
		local j= 0   // will increase this by 1 for each specification/pvalue, and use for filling p-values into different observations of the variable p1


	* REPLICATE GRADE 10 science FULL SAMPLE RESULTS for quantile 90, WITHOUT STD ERROR CORRECTIONS FOR LATE (TO VERIFY THAT POOLING INTRODUCES NO ERRORS)
		
		gen score = IRTsci
		
		qreg score treat [pw=sch_wght] if grade10==1, q(0.9) vce(robust)
			estimates store quantile90p
						
				
			di "checking replication for grade 10 sci 90th quantile 
			*etable, column(index) estimates(quantile90_s_g10_check quantile90p) showstars showstarsnote keep(treat) cstat(_r_b) /// 
			cstat(_r_se) cstat(_r_ci) mstat(N) ///
			stars(.10 "*" .05 "**" .01 "***", attach(_r_b))  
			// matches up
		
	
	* Quantile regressions with clustered standard errors (via bootstrapping), but no weights, Table 7
			
			gen year_dist_stratum = (grade10*1000)+dist_stratum
			
			set seed 26001
						
			bs, cluster(schoolid) strata(year_dist_stratum) reps(50): qreg score treat grade10, q(0.1)	
			estimates store quantile10
			test treat 
			local j=`j'+1
			replace p1=r(p) if _n==`j'
			replace p2=r(p) if _n==`j'  // for ITT adjusted and unadjusted p values are the same
			replace specification = "pooled science full itt 10th quantile" if _n==`j'
			
			set seed 27001
			
			bs, cluster(schoolid) strata(year_dist_stratum) reps(50): qreg score treat grade10, q(0.25)			
			estimates store quantile25
			test treat 
			local j=`j'+1
			replace p1=r(p) if _n==`j'
			replace p2=r(p) if _n==`j'  // for ITT adjusted and unadjusted p values are the same
			replace specification = "pooled science full itt 25th quantile" if _n==`j'
			
			set seed 28001
			
			bs, cluster(schoolid) strata(year_dist_stratum) reps(50): qreg score treat grade10, q(0.5)
			estimates store quantile50
			test treat 
			local j=`j'+1
			replace p1=r(p) if _n==`j'
			replace p2=r(p) if _n==`j'  // for ITT adjusted and unadjusted p values are the same
			replace specification = "pooled science full itt 50th quantile"	if _n==`j'
			
			set seed 29001
			
			bs, cluster(schoolid) strata(year_dist_stratum) reps(50): qreg score treat grade10, q(0.75)
			estimates store quantile75
			test treat 
			local j=`j'+1
			replace p1=r(p) if _n==`j'
			replace p2=r(p) if _n==`j'  // for ITT adjusted and unadjusted p values are the same
			replace specification = "pooled science full itt 75th quantile" if _n==`j'
			
			set seed 30001
			
			bs, cluster(schoolid) strata(year_dist_stratum) reps(50): qreg score treat grade10, q(0.9)
			estimates store quantile90
			test treat 
			local j=`j'+1
			replace p1=r(p) if _n==`j'
			replace p2=r(p) if _n==`j'  // for ITT adjusted and unadjusted p values are the same
			replace specification = "pooled science full itt 90th quantile" if _n==`j'
			
			* print results
			di "POOLED SCIENCE FULL SAMPLE QUANTILE REGRESSIONS"
			etable, column(index) estimates(quantile10 quantile25 quantile50 quantile75 quantile90) showstars showstarsnote keep(treat) cstat(_r_b) /// 
			cstat(_r_se) cstat(_r_ci) mstat(N) ///
			stars(.10 "*" .05 "**" .01 "***", attach(_r_b))  
					
			
		
			* With weights, but no clustering (Table S6)
			qreg score treat grade10 [pw=sch_wght], q(0.1) vce(robust)
			estimates store quantile10
			test treat 
			local j=`j'+1
			replace p1=r(p) if _n==`j'
			replace p2=r(p) if _n==`j'  // for ITT adjusted and unadjusted p values are the same
			replace specification = "pooled science full itt 10th quantile weights" if _n==`j'
			
			qreg score treat grade10 [pw=sch_wght], q(0.25) vce(robust)
			estimates store quantile25
			test treat 
			local j=`j'+1
			replace p1=r(p) if _n==`j'
			replace p2=r(p) if _n==`j'  // for ITT adjusted and unadjusted p values are the same
			replace specification = "pooled science full itt 25th quantile weights" if _n==`j'
			
			qreg score treat grade10 [pw=sch_wght], q(0.5) vce(robust)
			estimates store quantile50
			test treat 
			local j=`j'+1
			replace p1=r(p) if _n==`j'
			replace p2=r(p) if _n==`j'  // for ITT adjusted and unadjusted p values are the same
			replace specification = "pooled science full itt 50th quantile weights" if _n==`j'
			
			qreg score treat grade10 [pw=sch_wght], q(0.75) vce(robust)
			estimate store quantile75
			test treat 
			local j=`j'+1
			replace p1=r(p) if _n==`j'
			replace p2=r(p) if _n==`j'  // for ITT adjusted and unadjusted p values are the same
			replace specification = "pooled science full itt 75th quantile weights" if _n==`j'
			
			qreg score treat grade10 [pw=sch_wght], q(0.9) vce(robust)
			estimates store quantile90
			test treat 
			local j=`j'+1
			replace p1=r(p) if _n==`j'
			replace p2=r(p) if _n==`j'  // for ITT adjusted and unadjusted p values are the same
			replace specification = "pooled science full itt 90th quantile weights" if _n==`j'
								
				
			di "POOLED MATH FULL SAMPLE QUANTILE REGRESSIONS with weights no clustering"
			etable, column(index) estimates(quantile10 quantile25 quantile50 quantile75 quantile90) showstars showstarsnote keep(treat) cstat(_r_b) /// 
			cstat(_r_se) cstat(_r_ci) mstat(N) ///
			stars(.10 "*" .05 "**" .01 "***", attach(_r_b))  //  we don't show the pooled results in appendix table S6
	
		
			* save the 10 collected p-values and their specifications in a small dataset	
			keep if _n<=10
			keep p1 p2 specification
			tempfile pooled_science_quantile
			save "`pooled_science_quantile'", replace
			list
						
			clear
			
	
***************************************************************************

****MULTIPLE HYPOTHESIS TESTING CORRECTIONS FOR QUANTILE REGRESSIONS

****************************************************************************	

	* READ IN ALL THE COLLECTED P-VALUES
		
		use  "`g9_math_quantile'"
		append using "`g10_math_quantile'"
		append using "`g9_science_quantile'"
		append using "`g10_science_quantile'"
		append using "`pooled_math_quantile'"
		append using "`pooled_science_quantile'"
		gen specnum=_n
		
		list
		
		tempfile tempquantile
		
		save "`tempquantile'", replace
		

					
	* KEEP ONLY THOSE FOR THE GRADE-SPECIFIC ESTIMATION for Table 7

		
		egen keep1= anymatch(specnum), values(1 2 3 4 5 11 12 13 14 15 21 22 23 24 25 31 32 33 34 35)
		
		keep if keep1==1
		
		* p-value adjustments using the raw p-values
		qqvalue p1, method(yekutieli) qvalue(q1)  // this uses unadjusted pvalues for late
		list specification p1 q1
		
		* p-value adjustments using the p-values after using the Lee et al correction based on first stage F stat
		
		qqvalue p2, method(yekutieli) qvalue(q2)  // this uses adjusted pvalues for late
		list specification p2 q2
		
	* KEEP ONLY THOSE FOR pooled ESTIMATION for Table 7 lower half
		
		use "`tempquantile'", clear
		capture drop specnum
		gen specnum=_n
		
		egen keep2= anymatch(specnum), values(41 42 43 44 45 51 52 53 54 55)
		
		keep if keep2==1
		
		* p-value adjustments using the raw p-values
		qqvalue p1, method(yekutieli) qvalue(q1)  // this uses unadjusted pvalues for late
		list specification p1 q1
		
		* p-value adjustments using the p-values after using the Lee et al correction based on first stage F stat
		
		qqvalue p2, method(yekutieli) qvalue(q2)  // this uses adjusted pvalues for late
		list specification p2 q2	
	
		
	* KEEP ONLY THOSE FOR THE GRADE-SPECIFIC ESTIMATION for Table S6

		use "`tempquantile'", clear
		capture drop specnum
		gen specnum=_n
		egen keep3= anymatch(specnum), values(6 7 8 9 10 16 17 18 19 20 26 27 28 29 30 36 37 38 39 40)
		
		keep if keep3==1
			
		* p-value adjustments using the raw p-values
		qqvalue p1, method(yekutieli) qvalue(q1)  // this uses unadjusted pvalues for late
		list specification p1 q1
		
		* p-value adjustments using the p-values after using the Lee et al correction based on first stage F stat
		
		qqvalue p2, method(yekutieli) qvalue(q2)  // this uses adjusted pvalues for late
		list specification p2 q2
	
	
	log close
